Visual Servoing Platform version 3.7.0
Loading...
Searching...
No Matches
visp_python_xfeat_pose_estimation.py
1import argparse
2from dataclasses import dataclass
3from pathlib import Path
4from typing import Generator
5import numpy as np
6
7import pyrealsense2 as rs
8
9
10from visp.core import CameraParameters, ImageGray, ImageRGBa, ImageConvert, ImageFloat
11from visp.core import Display, Color, HomogeneousMatrix, MouseButton
12from visp.ar import Panda3DGeometryRenderer, Panda3DRendererSet, Panda3DRenderParameters
13from visp.rbt import Panda3DDepthCannyFilter
14from visp.rbt import RBTracker
15from visp.python.rbt import PythonRBExtensions
16from visp.python.rbt.xfeat import XFeatViewPointPoseEstimator, XFeatBackend
17from visp.python.display_utils import get_display
18
19
20@dataclass
21class Frame():
22 I: ImageGray
23 IRGB: ImageRGBa
24 I_depth: ImageFloat
25
27 def intrinsics(self) -> CameraParameters:
28 raise NotImplementedError()
29 def frames(self):
30 raise NotImplementedError()
31
33 def __init__(self, args):
34 import pyrealsense2 as rs
35 self.pipe = rs.pipeline()
36 self.config = rs.config()
37 self.h, self.w = args.height, args.width
38 self.config.enable_stream(rs.stream.color, self.w, self.h, rs.format.rgb8, args.fps)
39 self.config.enable_stream(rs.stream.depth, self.w, self.h, rs.format.z16, args.fps)
40
41
42 self.cfg = self.pipe.start(self.config)
43 self.profile = self.cfg.get_stream(rs.stream.color)
44 intr = self.profile.as_video_stream_profile().get_intrinsics() # Downcast to video_stream_profile and fetch intrinsics
45 self.cam = CameraParameters(intr.fx, intr.fy, intr.ppx, intr.ppy)
46 self.depth_scale = self.cfg.get_device().first_depth_sensor().get_depth_scale()
47 print(f'Opened Realsense camera with intrinsics:\n{self.cam}')
48
49 def intrinsics(self) -> CameraParameters:
50 return self.cam
51
52 def frames(self):
53
54 def generator():
55 align_to = rs.align(rs.stream.color)
56 img = ImageGray(self.h, self.w)
57 rgb = ImageRGBa(self.h, self.w)
58 depth = ImageFloat(self.h, self.w)
59 while True:
60 frame = self.pipe.wait_for_frames()
61 frame = align_to.process(frame)
62 I_np = np.asanyarray(frame.get_color_frame().as_frame().get_data())
63 I_depth_raw = np.asanyarray(frame.get_depth_frame().as_frame().get_data())
64 I_depth_float = I_depth_raw.astype(np.float32) * self.depth_scale
65 ImageConvert.RGBToGrey(I_np, img)
66 ImageConvert.RGBToRGBa(I_np, rgb)
67 np.copyto(src=I_depth_float, dst=depth.numpy())
68
69 yield Frame(img, rgb, depth)
70
71 return generator()
72
73
75 def __init__(self, h, w):
76 self.h = h
77 self.w = w
78 pad = w // 5
79 self.I_disp = ImageGray(h, w)
80 self.rgb_disp = ImageRGBa(h, w)
81 self.depth_disp = ImageGray(h, w)
82
83 self.dI, self.dRGB, self.dDepth = [get_display() for _ in range(3)]
84 self.dI.init(self.I_disp, 0, 0, 'Gray image')
85 self.dRGB.init(self.rgb_disp, w + pad, 0, 'Color image')
86 self.dDepth.init(self.depth_disp, 0, h + pad, 'Depth image')
87
88 def display(self, I: ImageGray, RGB: ImageRGBa, depth: ImageFloat):
89 assert I.getSize() == self.I_disp.getSize()
90 assert RGB.getSize() == self.rgb_disp.getSize()
91 assert depth.getSize() == self.depth_disp.getSize()
92
93 np.copyto(src=I.numpy(), dst=self.I_disp.numpy())
94 np.copyto(src=RGB.numpy(), dst=self.rgb_disp.numpy())
95 depth_uint = (np.minimum(depth.numpy() / 1.0, 1.0) * 255.0).astype(np.uint8)
96 np.copyto(src=depth_uint, dst=self.depth_disp.numpy())
97 for f in [self.I_disp, self.rgb_disp, self.depth_disp]:
98 Display.display(f)
99
100 def flush(self):
101 for f in [self.I_disp, self.rgb_disp, self.depth_disp]:
102 Display.flush(f)
103
104
105def main():
106 parser = argparse.ArgumentParser(description='Pose estimation with XFeat')
107
108 parser.add_argument('--database', type=str, required=True, help='Path where to save the keypoints and their descriptors. If it already exists, then it will be used to load keypoints.')
109 parser.add_argument('--object', type=str, required=True, help='Path to the .obj file of the object for which to estimate the pose')
110 parser.add_argument('--rbt-config', type=str, required=True, help='Path to the Render Based Tracker configuration. It is used to continuously estimate the pose in the learning step')
111 parser.add_argument('--init-file', type=str, help='An initialization file, required to init tracker when XFeat points have not yet been learned.')
112
113 parser.add_argument('--width', type=int, default=640, help='Width of the input images')
114 parser.add_argument('--height', type=int, default=480, help='Height of the input images')
115 parser.add_argument('--fps', type=int, default=60, help='Camera framerate')
116
117 args = parser.parse_args()
118
119 db_path = Path(args.database).absolute()
120 is_learning = not db_path.exists()
121
122 object_path = Path(args.object).absolute()
123 rbt_path = Path(args.rbt_config).absolute()
124
125 assert object_path.exists()
126 assert rbt_path.exists()
127
128 pose_estimator = XFeatViewPointPoseEstimator(db_path, False, False, 2 ** 12, 0.5, 0.25, 10)
129 pose_estimator.optim_params.gain = 0.5
130 pose_estimator.optim_params.minImprovementFactor = 0.001
131
132
133 frame_source = RealSenseSource(args)
134 exts = PythonRBExtensions()
135
136 if is_learning:
137 print('Database was not found, going into learning mode!')
138
139 print('Setting up tracker..')
140 tracker = RBTracker()
141
142 tracker.loadConfigurationFile(str(rbt_path))
143
144 exts.parse_python_extensions(tracker, rbt_path)
145 tracker.setModelPath(str(object_path))
146 tracker.setCameraParameters(frame_source.intrinsics(), frame_source.h, frame_source.w)
147 tracker.startTracking()
148
149 learn_loop(args, tracker, frame_source, pose_estimator)
150
151 else:
152 estimation_loop(object_path, frame_source, pose_estimator)
153
154def tracker_click_init_loop(frames, tracker: RBTracker, init_path: Path, display_frames: DisplayFrames):
155 for frame in frames:
156 display_frames.display(frame.I, frame.IRGB, frame.I_depth)
157 Display.displayText(display_frames.I_disp, 10, 10, 'Click to initialize by click', Color.red)
158 display_frames.flush()
159
160 clicked = Display.getClick(display_frames.I_disp, blocking=False)
161
162 if clicked:
163 tracker.initClick(display_frames.I_disp, str(init_path), True)
164 break
165
166def learn_loop(args, tracker: RBTracker, frame_source: RealSenseSource, pose_estimator: XFeatViewPointPoseEstimator):
167 frames = frame_source.frames()
168 display_frames = DisplayFrames(frame_source.h, frame_source.w)
169 print('Initializing tracker before recording keypoints...')
170 # Initialize by click
171 if args.init_file is None:
172 raise RuntimeError('Init file is required to initialize the Render Base tracker before learning keypoints.')
173 init_path = Path(args.init_file).absolute()
174
175 tracker_click_init_loop(frames, tracker, init_path, display_frames)
176
177 cMo = HomogeneousMatrix()
178 tracker.getPose(cMo)
179 print('Tracker was initialized with pose:\n', cMo)
180 line_height = 20
181 for frame in frames:
182 tracking_result = tracker.track(frame.I, frame.IRGB, frame.I_depth)
183
184 tracker.getPose(cMo)
185
186 display_frames.display(frame.I, frame.IRGB, frame.I_depth)
187 tracker.display(display_frames.I_disp, display_frames.rgb_disp, display_frames.depth_disp)
188
189 Display.displayText(display_frames.I_disp, 10, 10, 'Left click to record a viewpoint', Color.red)
190 Display.displayText(display_frames.I_disp, 10 + line_height, 10, 'Right click to stop', Color.red)
191 Display.displayText(display_frames.I_disp, 10 + line_height * 2, 10, 'Press R to reinitialize', Color.red)
192
193 button = MouseButton.MouseButtonType.none
194 clicked = Display.getClick(display_frames.I_disp, button, blocking=False)
195 key_pressed, key = Display.getKeyboardEventWithKey(display_frames.I_disp, blocking=False)
196
197
198 if clicked:
199 if button == MouseButton.button1:
200 pose_estimator.record(tracker.getMostRecentFrame(), cMo)
201 print('Recorded new viewpoint')
202
203 elif button == MouseButton.button3:
204 pose_estimator.save()
205 print('Finished recording, exiting...')
206 break
207 display_frames.flush()
208
209 if key_pressed:
210 if key.lower() == 'r':
211 tracker_click_init_loop(frames, tracker, init_path, display_frames)
212
213
214
215
216def estimation_loop(object_path: Path, frame_source: RealSenseSource, pose_estimator: XFeatViewPointPoseEstimator):
217 frames = frame_source.frames()
218 display_frames = DisplayFrames(frame_source.h, frame_source.w)
219 line_height = 20
220 last_estimated_cMo = HomogeneousMatrix()
221 h, w = frame_source.h, frame_source.w
222 canny_or, canny_valid = ImageFloat(h, w), ImageGray(h, w)
223 point_indices = None
224
225 geometry_renderer = Panda3DGeometryRenderer(Panda3DGeometryRenderer.RenderType.OBJECT_NORMALS, False)
226 canny_renderer = Panda3DDepthCannyFilter('canny', geometry_renderer, True, 0.01)
227 renderer = Panda3DRendererSet()
228
229 renderer.addSubRenderer(geometry_renderer)
230 renderer.addSubRenderer(canny_renderer)
231
232 render_params = Panda3DRenderParameters(frame_source.intrinsics(), frame_source.h, frame_source.w, 0.001, 0.25)
233 renderer.setRenderParameters(render_params)
234 renderer.initFramework()
235 renderer.addObjectToScene('object', str(object_path))
236 for frame in frames:
237 display_frames.display(frame.I, frame.IRGB, frame.I_depth)
238
239 if point_indices is not None:
240 for vs, us in zip(*point_indices):
241 Display.displayPoint(display_frames.I_disp, vs, us, Color.green, thickness=2)
242 Display.displayText(display_frames.I_disp, 10, 10, 'Left click to perform pose estimation', Color.red)
243 Display.displayText(display_frames.I_disp, 10 + line_height, 10, 'Right click to stop', Color.red)
244 button = MouseButton.MouseButtonType.none
245 clicked = Display.getClick(display_frames.I_disp, button, blocking=False)
246
247 if clicked:
248 if button == MouseButton.button1:
249 pose_ok, cMo = pose_estimator.estimate_pose(frame.IRGB, frame.I_depth, frame_source.intrinsics())
250 if pose_ok:
251 print('Pose estimation was detected as successful!')
252
253 last_estimated_cMo = cMo
254 renderer.setCameraPose(cMo.inverse())
255 nearV, farV = geometry_renderer.computeNearAndFarPlanesFromNode('object', True)
256 params = Panda3DRenderParameters(frame_source.intrinsics(), frame_source.h, frame_source.w, nearV, farV)
257 renderer.setRenderParameters(params)
258 renderer.renderFrame()
259 canny_renderer.getRender(canny_or, canny_valid)
260 point_indices = np.nonzero(canny_valid.numpy())
261
262 elif button == MouseButton.button3:
263 print('Finished pose estimation test, exiting...')
264 break
265
266 for frame_display in [display_frames.I_disp, display_frames.rgb_disp, display_frames.depth_disp]:
267 Display.displayFrame(frame_display, last_estimated_cMo, frame_source.intrinsics(), 0.05)
268
269 display_frames.flush()
270
271 from visp.ar import Panda3DFrameworkManager
272 Panda3DFrameworkManager.getInstance().exit()
273
274
275
276
277
278if __name__ == '__main__':
279 main()
display(self, ImageGray I, ImageRGBa RGB, ImageFloat depth)
learn_loop(args, RBTracker tracker, RealSenseSource frame_source, XFeatViewPointPoseEstimator pose_estimator)
tracker_click_init_loop(frames, RBTracker tracker, Path init_path, DisplayFrames display_frames)
estimation_loop(Path object_path, RealSenseSource frame_source, XFeatViewPointPoseEstimator pose_estimator)