Visual Servoing Platform version 3.7.0
Loading...
Searching...
No Matches
tutorial-apriltag-detector-live-rgbd-realsense.cpp
1
2#include <visp3/core/vpConfig.h>
3#ifdef VISP_HAVE_MODULE_SENSOR
4#include <visp3/sensor/vpRealSense2.h>
5#endif
7#include <visp3/detection/vpDetectorAprilTag.h>
9#include <visp3/core/vpImageConvert.h>
10#include <visp3/gui/vpDisplayFactory.h>
11#include <visp3/vision/vpPose.h>
12
13void usage(const char **argv, int error);
14
15void usage(const char **argv, int error)
16{
17 std::cout << "Synopsis" << std::endl
18 << " " << argv[0]
19 << " [--tag-size <size>]"
20 << " [--tag-family <family>]"
21 << " [--tag-decision-margin-threshold <threshold>]"
22 << " [--tag-hamming-distance-threshold <threshold>]"
23 << " [--tag-quad-decimate <factor>]"
24 << " [--tag-n-threads <number>]"
25 << " [--tag-z-aligned]"
26 << " [--tag-pose-method <method>]"
27#if defined(VISP_HAVE_DISPLAY)
28 << " [--display-tag]"
29 << " [--display-off]"
30 << " [--color <id>]"
31 << " [--thickness <thickness>"
32#endif
33 << " [--verbose, -v]"
34 << " [--help, -h]" << std::endl
35 << std::endl;
36 std::cout << "Description" << std::endl
37 << " Compute the pose of an Apriltag in images acquired with a realsense camera." << std::endl
38 << std::endl
39 << " --tag-size <size>" << std::endl
40 << " Apriltag size in [m]." << std::endl
41 << " Default: 0.03" << std::endl
42 << std::endl
43 << " --tag-family <family>" << std::endl
44 << " Apriltag family. Supported values are:" << std::endl
45 << " 0: TAG_36h11" << std::endl
46 << " 1: TAG_36h10 (DEPRECATED)" << std::endl
47 << " 2: TAG_36ARTOOLKIT (DEPRECATED)" << std::endl
48 << " 3: TAG_25h9" << std::endl
49 << " 4: TAG_25h7 (DEPRECATED)" << std::endl
50 << " 5: TAG_16h5" << std::endl
51 << " 6: TAG_CIRCLE21h7" << std::endl
52 << " 7: TAG_CIRCLE49h12" << std::endl
53 << " 8: TAG_CUSTOM48h12" << std::endl
54 << " 9: TAG_STANDARD41h12" << std::endl
55 << " 10: TAG_STANDARD52h13" << std::endl
56 << " 11: TAG_ARUCO_4x4_50" << std::endl
57 << " 12: TAG_ARUCO_4x4_100" << std::endl
58 << " 13: TAG_ARUCO_4x4_250" << std::endl
59 << " 14: TAG_ARUCO_4x4_1000" << std::endl
60 << " 15: TAG_ARUCO_5x5_50" << std::endl
61 << " 16: TAG_ARUCO_5x5_100" << std::endl
62 << " 17: TAG_ARUCO_5x5_250" << std::endl
63 << " 18: TAG_ARUCO_5x5_1000" << std::endl
64 << " 19: TAG_ARUCO_6x6_50" << std::endl
65 << " 20: TAG_ARUCO_6x6_100" << std::endl
66 << " 21: TAG_ARUCO_6x6_250" << std::endl
67 << " 22: TAG_ARUCO_6x6_1000" << std::endl
68 << " 23: TAG_ARUCO_7x7_50" << std::endl
69 << " 24: TAG_ARUCO_7x7_100" << std::endl
70 << " 25: TAG_ARUCO_7x7_250" << std::endl
71 << " 26: TAG_ARUCO_7x7_1000" << std::endl
72 << " 27: TAG_ARUCO_MIP_36h12" << std::endl
73 << " Default: 0 (36h11)" << std::endl
74 << std::endl
75 << " --tag-decision-margin-threshold <threshold>" << std::endl
76 << " Threshold used to discard low-confident detections. A typical value is " << std::endl
77 << " around 100. The higher this value, the more false positives will be filtered" << std::endl
78 << " out. When this value is set to -1, false positives are not filtered out." << std::endl
79 << " Default: 50" << std::endl
80 << std::endl
81 << " --tag-hamming-distance-threshold <threshold>" << std::endl
82 << " Threshold used to discard low-confident detections with corrected bits." << std::endl
83 << " A typical value is between 0 and 3. The lower this value, the more false" << std::endl
84 << " positives will be filtered out." << std::endl
85 << " Default: 0" << std::endl
86 << std::endl
87 << " --tag-quad-decimate <factor>" << std::endl
88 << " Decimation factor used to detect a tag. " << std::endl
89 << " Default: 1" << std::endl
90 << std::endl
91 << " --tag-n-threads <number>" << std::endl
92 << " Number of threads used to detect a tag." << std::endl
93 << " Default: 1" << std::endl
94 << std::endl
95 << " --tag-z-aligned" << std::endl
96 << " When enabled, tag z-axis and camera z-axis are aligned." << std::endl
97 << " Default: false" << std::endl
98 << std::endl
99 << " --tag-pose-method <method>" << std::endl
100 << " Algorithm used to compute the tag pose from its 4 corners." << std::endl
101 << " Possible values are:" << std::endl
102 << " 0: HOMOGRAPHY" << std::endl
103 << " 1: HOMOGRAPHY_VIRTUAL_VS" << std::endl
104 << " 2: DEMENTHON_VIRTUAL_VS" << std::endl
105 << " 3: LAGRANGE_VIRTUAL_VS" << std::endl
106 << " 4: BEST_RESIDUAL_VIRTUAL_VS" << std::endl
107 << " 5: HOMOGRAPHY_ORTHOGONAL_ITERATION" << std::endl
108 << " Default: 1 (HOMOGRAPHY_VIRTUAL_VS)" << std::endl
109 << std::endl
110#if defined(VISP_HAVE_DISPLAY)
111 << " --display-tag" << std::endl
112 << " Flag used to enable displaying the edges of a tag." << std::endl
113 << " Default: disabled" << std::endl
114 << std::endl
115 << " --display-off" << std::endl
116 << " Flag used to turn display off." << std::endl
117 << " Default: enabled" << std::endl
118 << std::endl
119 << " --color <id>" << std::endl
120 << " Color id used to display the frame over each tag." << std::endl
121 << " Possible values are:" << std::endl
122 << " -1: R-G-B colors for X, Y, Z axis respectively" << std::endl
123 << " 0: all axis in black" << std::endl
124 << " 1: all axis in white" << std::endl
125 << " ..." << std::endl
126 << " Default: -1" << std::endl
127 << std::endl
128 << " --thickness <thickness>" << std::endl
129 << " Thickness of the drawings in overlay." << std::endl
130 << " Default: 2" << std::endl
131 << std::endl
132#endif
133 << " --verbose, -v" << std::endl
134 << " Enable extra verbosity." << std::endl
135 << std::endl
136 << " --help, -h" << std::endl
137 << " Print this helper message." << std::endl
138 << std::endl;
139
140 if (error) {
141 std::cout << "Error" << std::endl
142 << " "
143 << "Unsupported parameter " << argv[error] << std::endl;
144 }
145}
146
147int main(int argc, const char **argv)
148{
150#if defined(VISP_HAVE_APRILTAG) && defined(VISP_HAVE_REALSENSE2)
152#ifdef ENABLE_VISP_NAMESPACE
153 using namespace VISP_NAMESPACE_NAME;
154#endif
155
158 double opt_tag_size = 0.053;
159 float opt_tag_quad_decimate = 1.0;
160 float opt_tag_decision_margin_threshold = 50;
161 int opt_tag_hamming_distance_threshold = 2;
162 int opt_tag_nThreads = 1;
163 bool opt_display_tag = false;
164 int opt_color_id = -1;
165 unsigned int opt_thickness = 2;
166 bool opt_tag_z_align_frame = false;
167 bool opt_verbose = false;
168
169#if !(defined(VISP_HAVE_DISPLAY))
170 bool opt_display_off = true;
171 std::cout << "Warning: There is no 3rd party to display images..." << std::endl;
172#else
173 bool opt_display_off = false;
174#endif
175
176 for (int i = 1; i < argc; ++i) {
177 if (std::string(argv[i]) == "--tag-size" && i + 1 < argc) {
178 opt_tag_size = atof(argv[++i]);
179 }
180 else if (std::string(argv[i]) == "--tag-family" && i + 1 < argc) {
181 opt_tag_family = (vpDetectorAprilTag::vpAprilTagFamily)atoi(argv[++i]);
182 }
183 else if (std::string(argv[i]) == "--tag-quad-decimate" && i + 1 < argc) {
184 opt_tag_quad_decimate = static_cast<float>(atof(argv[++i]));
185 }
186 else if (std::string(argv[i]) == "--tag-n-threads" && i + 1 < argc) {
187 opt_tag_nThreads = atoi(argv[++i]);
188 }
189 else if (std::string(argv[i]) == "--tag-z-aligned") {
190 opt_tag_z_align_frame = true;
191 }
192 else if (std::string(argv[i]) == "--tag-pose-method" && i + 1 < argc) {
193 opt_tag_pose_estimation_method = (vpDetectorAprilTag::vpPoseEstimationMethod)atoi(argv[++i]);
194 }
195 else if (std::string(argv[i]) == "--tag-decision-margin-threshold" && i + 1 < argc) {
196 opt_tag_decision_margin_threshold = static_cast<float>(atof(argv[++i]));
197 }
198 else if (std::string(argv[i]) == "--tag-hamming-distance-threshold" && i + 1 < argc) {
199 opt_tag_hamming_distance_threshold = atoi(argv[++i]);
200 }
201#if defined(VISP_HAVE_DISPLAY)
202 else if (std::string(argv[i]) == "--display-tag") {
203 opt_display_tag = true;
204 }
205 else if (std::string(argv[i]) == "--display-off") {
206 opt_display_off = true;
207 }
208 else if (std::string(argv[i]) == "--color" && i + 1 < argc) {
209 opt_color_id = atoi(argv[++i]);
210 }
211 else if (std::string(argv[i]) == "--thickness" && i + 1 < argc) {
212 opt_thickness = static_cast<unsigned int>(atoi(argv[++i]));
213 }
214#endif
215 else if (std::string(argv[i]) == "--verbose" || std::string(argv[i]) == "-v") {
216 opt_verbose = true;
217 }
218 else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
219 usage(argv, 0);
220 return EXIT_SUCCESS;
221 }
222 else {
223 usage(argv, i);
224 return EXIT_FAILURE;
225 }
226 }
227
228#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
229 std::shared_ptr<vpDisplay> d1, d2, d3;
230#else
231 vpDisplay *d1 = nullptr;
232 vpDisplay *d2 = nullptr;
233 vpDisplay *d3 = nullptr;
234#endif
235
236 try {
238 std::cout << "Use Realsense 2 grabber" << std::endl;
239 vpRealSense2 g;
240 rs2::config config;
241 unsigned int width = 640, height = 480;
242 config.enable_stream(RS2_STREAM_COLOR, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_RGBA8, 30);
243 config.enable_stream(RS2_STREAM_DEPTH, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_Z16, 30);
244 config.enable_stream(RS2_STREAM_INFRARED, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_Y8, 30);
245
247 vpImage<vpRGBa> I_color(height, width);
248 vpImage<uint16_t> I_depth_raw(height, width);
250
251 g.open(config);
252 const float depth_scale = g.getDepthScale();
253 std::cout << "I_color: " << I_color.getWidth() << " " << I_color.getHeight() << std::endl;
254 std::cout << "I_depth_raw: " << I_depth_raw.getWidth() << " " << I_depth_raw.getHeight() << std::endl;
255
256 rs2::align align_to_color = RS2_STREAM_COLOR;
257 g.acquire(reinterpret_cast<unsigned char *>(I_color.bitmap), reinterpret_cast<unsigned char *>(I_depth_raw.bitmap),
258 nullptr, nullptr, &align_to_color);
259
260 std::cout << "Read camera parameters from Realsense device" << std::endl;
264
265 std::cout << cam << std::endl;
266 std::cout << "Tag detector settings" << std::endl;
267 std::cout << " Tag size [m] : " << opt_tag_size << std::endl;
268 std::cout << " Tag family : " << opt_tag_family << std::endl;
269 std::cout << " Quad decimate : " << opt_tag_quad_decimate << std::endl;
270 std::cout << " Decision margin threshold : " << opt_tag_decision_margin_threshold << std::endl;
271 std::cout << " Hamming distance threshold: " << opt_tag_hamming_distance_threshold << std::endl;
272 std::cout << " Num threads : " << opt_tag_nThreads << std::endl;
273 std::cout << " Z aligned : " << opt_tag_z_align_frame << std::endl;
274 std::cout << " Pose estimation : " << opt_tag_pose_estimation_method << std::endl;
275
276 vpImage<vpRGBa> I_color2 = I_color;
277 vpImage<float> depthMap;
278 vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
279
280 if (!opt_display_off) {
281#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
282 d1 = vpDisplayFactory::createDisplay(I_color, 100, 30, "Pose from Homography");
283 d2 = vpDisplayFactory::createDisplay(I_color2, I_color.getWidth() + 120, 30, "Pose from RGBD fusion");
284 d3 = vpDisplayFactory::createDisplay(I_depth, 100, I_color.getHeight() + 70, "Depth");
285#else
286 d1 = vpDisplayFactory::allocateDisplay(I_color, 100, 30, "Pose from Homography");
287 d2 = vpDisplayFactory::allocateDisplay(I_color2, I_color.getWidth() + 120, 30, "Pose from RGBD fusion");
288 d3 = vpDisplayFactory::allocateDisplay(I_depth, 100, I_color.getHeight() + 70, "Depth");
289#endif
290 }
291
293 vpDetectorAprilTag detector(opt_tag_family);
295
297 detector.setAprilTagQuadDecimate(opt_tag_quad_decimate);
298 detector.setAprilTagPoseEstimationMethod(opt_tag_pose_estimation_method);
299 detector.setAprilTagNbThreads(opt_tag_nThreads);
300 detector.setDisplayTag(opt_display_tag, opt_color_id < 0 ? vpColor::none : vpColor::getColor(opt_color_id), opt_thickness);
301 detector.setZAlignedWithCameraAxis(opt_tag_z_align_frame);
302 detector.setAprilTagDecisionMarginThreshold(opt_tag_decision_margin_threshold);
303 detector.setAprilTagHammingDistanceThreshold(opt_tag_hamming_distance_threshold);
305 std::vector<double> time_vec, time_vec_detection;
306 for (;;) {
307 double t = vpTime::measureTimeMs();
308
310 g.acquire(reinterpret_cast<unsigned char *>(I_color.bitmap),
311 reinterpret_cast<unsigned char *>(I_depth_raw.bitmap), nullptr, nullptr, &align_to_color);
313
314 I_color2 = I_color;
315 vpImageConvert::convert(I_color, I);
316 vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
317
318 depthMap.resize(I_depth_raw.getHeight(), I_depth_raw.getWidth());
319#ifdef VISP_HAVE_OPENMP
320#pragma omp parallel for
321#endif
322 for (int i = 0; i < static_cast<int>(I_depth_raw.getHeight()); i++) {
323 for (int j = 0; j < static_cast<int>(I_depth_raw.getWidth()); j++) {
324 if (I_depth_raw[i][j]) {
325 float Z = I_depth_raw[i][j] * depth_scale;
326 depthMap[i][j] = Z;
327 }
328 else {
329 depthMap[i][j] = 0;
330 }
331 }
332 }
333
334 vpDisplay::display(I_color);
335 vpDisplay::display(I_color2);
336 vpDisplay::display(I_depth);
337
338 double t_detection = vpTime::measureTimeMs();
339 std::vector<vpHomogeneousMatrix> cMo_vec;
340 detector.detect(I, opt_tag_size, cam, cMo_vec);
341 t_detection = vpTime::measureTimeMs() - t_detection;
342 time_vec_detection.push_back(t_detection);
343
344 // Display camera pose for each tag
345 std::vector<std::vector<vpImagePoint> > tagsCorners = detector.getTagsCorners();
346 detector.displayTags(I_color, tagsCorners, vpColor::none, opt_thickness);
347 detector.displayFrames(I_color, cMo_vec, cam, opt_tag_size / 2, vpColor::none, opt_thickness);
348 detector.displayTags(I_color2, tagsCorners, vpColor::none, opt_thickness);
349 detector.displayFrames(I_color2, cMo_vec, cam, opt_tag_size / 2, vpColor::none, opt_thickness);
350
352 std::vector<std::vector<vpImagePoint> > tags_corners = detector.getPolygon();
353 std::vector<int> tags_id = detector.getTagsId();
354 std::map<int, double> tags_size;
355 tags_size[-1] = opt_tag_size; // Default tag size
356 std::vector<std::vector<vpPoint> > tags_points3d = detector.getTagsPoints3D(tags_id, tags_size);
357 for (size_t i = 0; i < tags_corners.size(); i++) {
359 double confidence_index;
360 if (vpPose::computePlanarObjectPoseFromRGBD(depthMap, tags_corners[i], cam, tags_points3d[i], cMo,
361 &confidence_index)) {
362 if (confidence_index > 0.5) {
363 vpDisplay::displayFrame(I_color2, cMo, cam, opt_tag_size / 2, vpColor::none, opt_thickness);
364 }
365 else if (confidence_index > 0.25) {
366 vpDisplay::displayFrame(I_color2, cMo, cam, opt_tag_size / 2, vpColor::orange, opt_thickness);
367 }
368 else {
369 vpDisplay::displayFrame(I_color2, cMo, cam, opt_tag_size / 2, vpColor::red, opt_thickness);
370 }
371 std::stringstream ss;
372 ss << "Tag id " << tags_id[i] << " confidence: " << confidence_index;
373 vpDisplay::displayText(I_color2, 35 + static_cast<int>(i) * 15, 20, ss.str(), vpColor::red);
374
375 if (opt_verbose) {
376 std::cout << ss.str() << std::endl;
377 std::cout << "cMo[" << i << "]: \n" << cMo_vec[i] << std::endl;
378 std::cout << "cMo[" << i << "] using depth: \n" << cMo << std::endl;
379 }
380 }
381 else {
382 vpDisplay::displayText(I_color2, 35, 20, "Unable to compuee a valid pose from RGB-D fusion", vpColor::red);
383 vpDisplay::displayText(I_color2, 50, 20, "Check your tag size parameter...", vpColor::red);
384 }
385 }
387
388 vpDisplay::displayText(I_color, 20, 20, "Pose from homography + VVS", vpColor::red);
389 vpDisplay::displayText(I_color2, 20, 20, "Pose from RGBD fusion", vpColor::red);
390 vpDisplay::displayText(I_color, 35, 20, "Click to quit.", vpColor::red);
392 time_vec.push_back(t);
393
394 std::stringstream ss;
395 ss << "Detection time: " << t << " ms for " << detector.getNbObjects() << " tags";
396 vpDisplay::displayText(I_color, 50, 20, ss.str(), vpColor::red);
397
398 if (vpDisplay::getClick(I_color, false))
399 break;
400
401 vpDisplay::flush(I_color);
402 vpDisplay::flush(I_color2);
403 vpDisplay::flush(I_depth);
404 }
405
406 std::cout << "Benchmark loop processing time" << std::endl;
407 std::cout << "Mean / Median / Std: " << vpMath::getMean(time_vec) << " ms"
408 << " ; " << vpMath::getMedian(time_vec) << " ms"
409 << " ; " << vpMath::getStdev(time_vec) << " ms" << std::endl;
410
411 std::cout << "Benchmark detection processing time" << std::endl;
412 std::cout << "Mean / Median / Std: " << vpMath::getMean(time_vec_detection) << " ms"
413 << " ; " << vpMath::getMedian(time_vec_detection) << " ms"
414 << " ; " << vpMath::getStdev(time_vec_detection) << " ms" << std::endl;
415 }
416 catch (const vpException &e) {
417 std::cerr << "Catch an exception: " << e.getMessage() << std::endl;
418 }
419
420#if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11)
421 if (!opt_display_off) {
422 if (d1 != nullptr) {
423 delete d1;
424 }
425 if (d2 != nullptr) {
426 delete d2;
427 }
428 if (d3 != nullptr) {
429 delete d3;
430 }
431 }
432#endif
433
434 return EXIT_SUCCESS;
435#else
436 (void)argc;
437 (void)argv;
438#ifndef VISP_HAVE_APRILTAG
439 std::cout << "Enable Apriltag support, configure and build ViSP to run this tutorial" << std::endl;
440#else
441 std::cout << "Install librealsense 3rd party, configure and build ViSP again to use this example" << std::endl;
442#endif
443#endif
444 return EXIT_SUCCESS;
445}
Generic class defining intrinsic camera parameters.
@ perspectiveProjWithoutDistortion
Perspective projection without distortion model.
static vpColor getColor(const unsigned int &i)
Definition vpColor.h:300
static const vpColor red
Definition vpColor.h:198
static const vpColor none
Definition vpColor.h:210
static const vpColor orange
Definition vpColor.h:208
@ TAG_36h11
AprilTag 36h11 pattern (recommended).
Class that defines generic functionalities for display.
Definition vpDisplay.h:171
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition vpException.h:60
Implementation of an homogeneous matrix and operations on such kind of matrices.
static void createDepthHistogram(const vpImage< uint16_t > &src_depth, vpImage< vpRGBa > &dest_rgba)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Definition of the vpImage class member functions.
Definition vpImage.h:131
void resize(unsigned int h, unsigned int w)
resize the image : Image initialization
Definition vpImage.h:544
static double getMedian(const std::vector< double > &v)
Definition vpMath.cpp:343
static double getStdev(const std::vector< double > &v, bool useBesselCorrection=false)
Definition vpMath.cpp:374
static double getMean(const std::vector< double > &v)
Definition vpMath.cpp:323
static bool computePlanarObjectPoseFromRGBD(const vpImage< float > &depthMap, const std::vector< vpImagePoint > &corners, const vpCameraParameters &colorIntrinsics, const std::vector< vpPoint > &point3d, vpHomogeneousMatrix &cMo, double *confidence_index=nullptr)
vpCameraParameters getCameraParameters(const rs2_stream &stream, vpCameraParameters::vpCameraParametersProjType type=vpCameraParameters::perspectiveProjWithDistortion, int index=-1) const
void acquire(vpImage< unsigned char > &grey, double *ts=nullptr)
bool open(const rs2::config &cfg=rs2::config())
float getDepthScale()
std::shared_ptr< vpDisplay > createDisplay()
Return a smart pointer vpDisplay specialization if a GUI library is available or nullptr otherwise.
vpDisplay * allocateDisplay()
Return a newly allocated vpDisplay specialization if a GUI library is available or nullptr otherwise.
VISP_EXPORT double measureTimeMs()