Skip to content

Commit 3a8d483

Browse files
Moved ArUco and chessboard detectors to marker-detection (#158)
1 parent 8c48338 commit 3a8d483

File tree

4 files changed

+1009
-4
lines changed

4 files changed

+1009
-4
lines changed

packages/marker-detection/pyproject.toml

+15-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,22 @@ name = "marker-detection"
33
version = "0.1.0"
44
description = "Add your description here"
55
readme = "README.md"
6-
authors = [
7-
{ name = "Jan Smółka", email = "[email protected]" }
8-
]
6+
authors = [{ name = "Jan Smółka", email = "[email protected]" }]
97
requires-python = ">=3.12.7"
10-
dependencies = []
8+
dependencies = [
9+
"attrs>=25.1.0",
10+
"numpy>=1.26.4",
11+
"opencv-python>=4.11.0.86",
12+
"pyserde>=0.23.0",
13+
"scipy>=1.15.1",
14+
"video-io",
15+
]
16+
17+
[dependency-groups]
18+
dev = ["pytest>=8.3.4", "syrupy>=4.8.1"]
19+
20+
[tool.uv.sources]
21+
video-io = { workspace = true }
1122

1223
[build-system]
1324
requires = ["hatchling"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
import typing
2+
from dataclasses import dataclass
3+
from enum import IntEnum
4+
from typing import TypedDict
5+
6+
import cv2 as opencv
7+
import cv2.aruco as opencv_aruco
8+
import numpy
9+
import scipy.spatial.transform as scipy_transform
10+
from jaxtyping import Float, Int
11+
from video_io import annotation
12+
from video_io.frame import ArrayRgbFrame
13+
14+
# type Array = NDArray[numpy.float32]
15+
16+
type IntrinsicsMatrix = Float[numpy.ndarray, '3 3']
17+
type DistortionCoefficients = Float[numpy.ndarray, '5']
18+
19+
20+
# TODO: Add serialization to `Dictionary`.
21+
22+
23+
class Dictionary(IntEnum):
24+
ARUCO_ORIGINAL = opencv_aruco.DICT_ARUCO_ORIGINAL
25+
26+
ARUCO_4X4_100 = opencv_aruco.DICT_4X4_100
27+
ARUCO_4X4_200 = opencv_aruco.DICT_4X4_250
28+
ARUCO_4X4_1000 = opencv_aruco.DICT_4X4_1000
29+
30+
ARUCO_5X5_50 = opencv_aruco.DICT_5X5_50
31+
ARUCO_5X5_100 = opencv_aruco.DICT_5X5_100
32+
ARUCO_5X5_250 = opencv_aruco.DICT_5X5_250
33+
ARUCO_5X5_1000 = opencv_aruco.DICT_5X5_1000
34+
35+
ARUCO_6X6_50 = opencv_aruco.DICT_6X6_50
36+
ARUCO_6X6_100 = opencv_aruco.DICT_6X6_100
37+
ARUCO_6X6_250 = opencv_aruco.DICT_6X6_250
38+
ARUCO_6X6_1000 = opencv_aruco.DICT_6X6_1000
39+
40+
ARUCO_7X7_50 = opencv_aruco.DICT_7X7_50
41+
ARUCO_7X7_100 = opencv_aruco.DICT_7X7_100
42+
ARUCO_7X7_250 = opencv_aruco.DICT_7X7_250
43+
ARUCO_7X7_1000 = opencv_aruco.DICT_7X7_1000
44+
45+
ARUCO_MIP_36H12 = opencv_aruco.DICT_ARUCO_MIP_36H12
46+
47+
APRIL_16H5 = opencv_aruco.DICT_APRILTAG_16H5
48+
APRIL_25H9 = opencv_aruco.DICT_APRILTAG_25H9
49+
APRIL_36H10 = opencv_aruco.DICT_APRILTAG_36H10
50+
APRIL_36H11 = opencv_aruco.DICT_APRILTAG_36H11
51+
52+
@classmethod
53+
def parse(cls, input: str) -> 'Dictionary | None':
54+
return _DICTIONARY_NAMES_TO_VARIANTS.get(input)
55+
56+
57+
_DICTIONARY_NAMES_TO_VARIANTS: dict[str, Dictionary] = {
58+
'aruco-original': Dictionary.ARUCO_ORIGINAL,
59+
'aruco-4x4-100': Dictionary.ARUCO_4X4_100,
60+
'aruco-4x4-200': Dictionary.ARUCO_4X4_200,
61+
'aruco-4x4-1000': Dictionary.ARUCO_4X4_1000,
62+
'aruco-5x5-50': Dictionary.ARUCO_5X5_50,
63+
'aruco-5x5-100': Dictionary.ARUCO_5X5_100,
64+
'aruco-5x5-250': Dictionary.ARUCO_5X5_250,
65+
'aruco-5x5-1000': Dictionary.ARUCO_5X5_1000,
66+
'aruco-6x6-50': Dictionary.ARUCO_6X6_50,
67+
'aruco-6x6-100': Dictionary.ARUCO_6X6_100,
68+
'aruco-6x6-250': Dictionary.ARUCO_6X6_250,
69+
'aruco-6x6-1000': Dictionary.ARUCO_6X6_1000,
70+
'aruco-7x7-50': Dictionary.ARUCO_7X7_50,
71+
'aruco-7x7-100': Dictionary.ARUCO_7X7_100,
72+
'aruco-7x7-250': Dictionary.ARUCO_7X7_250,
73+
'aruco-7x7-1000': Dictionary.ARUCO_7X7_1000,
74+
'aruco-mip-36h12': Dictionary.ARUCO_MIP_36H12,
75+
'april-16h5': Dictionary.APRIL_16H5,
76+
'april-25h9': Dictionary.APRIL_25H9,
77+
'april-36h10': Dictionary.APRIL_36H10,
78+
'april-36h11': Dictionary.APRIL_36H11,
79+
}
80+
81+
_DICTIONARY_VARIANTS_TO_NAMES: dict[Dictionary, str] = dict(
82+
map(lambda entry: (entry[1], entry[0]), _DICTIONARY_NAMES_TO_VARIANTS.items())
83+
)
84+
85+
86+
class RigidModel:
87+
square_size: float
88+
coordinates: Float[numpy.ndarray, '3 4']
89+
90+
def __init__(self, square_size: float, depth: float) -> None:
91+
self.square_size = square_size
92+
93+
self.coordinates = numpy.array(
94+
[
95+
[-square_size / 2.0, square_size / 2.0, depth],
96+
[square_size / 2.0, square_size / 2.0, depth],
97+
[square_size / 2.0, -square_size / 2.0, depth],
98+
[-square_size / 2.0, -square_size / 2.0, depth],
99+
],
100+
dtype=numpy.float32,
101+
)
102+
103+
104+
class VisualizationContext(TypedDict):
105+
intrinsics: IntrinsicsMatrix
106+
marker_draw_masks: bool
107+
marker_draw_ids: bool
108+
marker_draw_axes: bool
109+
marker_draw_angles: bool
110+
marker_mask_color: tuple[float, float, float, float]
111+
marker_axis_length: int
112+
marker_axis_thickness: int
113+
114+
115+
@dataclass(frozen=True, slots=True)
116+
class Transformation:
117+
rotation: Float[numpy.ndarray, '3 3']
118+
translation: Float[numpy.ndarray, '3']
119+
120+
def euler_angles(self) -> Float[numpy.ndarray, '3']:
121+
return ( # type: ignore[no-any-return]
122+
scipy_transform.Rotation.from_matrix(self.rotation).as_euler(
123+
'xyz',
124+
degrees=False,
125+
)
126+
)
127+
128+
129+
@dataclass(frozen=True)
130+
class Result:
131+
corners: Int[numpy.ndarray, 'n 4']
132+
ids: Int[numpy.ndarray, ' n']
133+
transformations: list[Transformation]
134+
135+
def draw(
136+
self,
137+
frame: ArrayRgbFrame,
138+
context: VisualizationContext,
139+
) -> ArrayRgbFrame:
140+
if len(self.ids) == 0:
141+
return frame
142+
143+
draw_boxes = context['marker_draw_masks']
144+
draw_ids = context['marker_draw_ids']
145+
draw_axes = context['marker_draw_axes']
146+
draw_angles = context['marker_draw_angles']
147+
148+
# TODO: Get rid of nested parts. It's better to have the same loop a few times.
149+
150+
if draw_boxes:
151+
r, g, b, _ = context['marker_mask_color']
152+
color = (int(r), int(g), int(b))
153+
154+
for marker_corners_raw in self.corners:
155+
marker_corners = marker_corners_raw.reshape(-1, 2).astype(numpy.int32)
156+
157+
annotation.draw_filled_polygon_with_opacity(
158+
frame,
159+
marker_corners,
160+
color=color,
161+
opacity=0.5,
162+
)
163+
164+
corner_pixels: list[tuple[int, int]] = list(map(tuple, marker_corners))
165+
upper_left, upper_right, lower_right, lower_left = corner_pixels
166+
167+
annotation.draw_point_with_description(
168+
frame,
169+
upper_left,
170+
'upper_left',
171+
point_radius=1,
172+
font_scale=0.4,
173+
text_location='above',
174+
)
175+
annotation.draw_point_with_description(
176+
frame,
177+
upper_right,
178+
'upper_right',
179+
point_radius=1,
180+
font_scale=0.4,
181+
text_location='above',
182+
)
183+
annotation.draw_point_with_description(
184+
frame,
185+
lower_right,
186+
'lower_right',
187+
point_radius=1,
188+
font_scale=0.4,
189+
)
190+
annotation.draw_point_with_description(
191+
frame,
192+
lower_left,
193+
'lower_left',
194+
point_radius=1,
195+
font_scale=0.4,
196+
)
197+
198+
if draw_axes:
199+
length = context['marker_axis_length']
200+
thickness = context['marker_axis_thickness']
201+
202+
x_color = (255, 0, 0)
203+
y_color = (0, 255, 0)
204+
z_color = (0, 0, 255)
205+
206+
intrinsics = context['intrinsics']
207+
208+
basis_points = numpy.array(
209+
[
210+
[0.0, 0.0, 0.0],
211+
[length, 0.0, 0.0],
212+
[0.0, length, 0.0],
213+
[0.0, 0.0, length],
214+
],
215+
dtype=numpy.float32,
216+
).T
217+
218+
for transformation in self.transformations:
219+
rotation = transformation.rotation
220+
translation = transformation.translation.reshape(-1, 1)
221+
222+
transformed_basis = rotation @ basis_points + translation
223+
transformed_basis /= transformed_basis[2]
224+
projected_basis = intrinsics @ transformed_basis
225+
226+
projected_basis_pixel_coordinates: list[tuple[int, int]] = (
227+
projected_basis.T[:, :2].astype(int).tolist()
228+
)
229+
o, x, y, z = projected_basis_pixel_coordinates
230+
231+
opencv.line(frame, o, x, x_color, thickness)
232+
opencv.line(frame, o, y, y_color, thickness)
233+
opencv.line(frame, o, z, z_color, thickness)
234+
235+
if draw_angles:
236+
angles: list[float] = (
237+
180.0 / numpy.pi * transformation.euler_angles()
238+
).tolist()
239+
240+
x_angle, y_angle, z_angle = angles
241+
242+
annotation.draw_point_with_description(
243+
frame,
244+
x,
245+
f'x: {x_angle:.2f}',
246+
font_scale=0.3,
247+
point_radius=1,
248+
point_color=x_color,
249+
)
250+
annotation.draw_point_with_description(
251+
frame,
252+
y,
253+
f'y: {y_angle:.2f}',
254+
font_scale=0.3,
255+
point_radius=1,
256+
point_color=y_color,
257+
)
258+
annotation.draw_point_with_description(
259+
frame,
260+
z,
261+
f'z: {z_angle:.2f}',
262+
font_scale=0.3,
263+
point_radius=1,
264+
point_color=z_color,
265+
)
266+
267+
if draw_ids:
268+
for id, marker_corners in zip(self.ids, self.corners):
269+
marker_corners = marker_corners.reshape(-1, 2)
270+
center: tuple[int, int] = (
271+
numpy.mean(marker_corners, axis=0).astype(int).tolist()
272+
)
273+
274+
annotation.draw_text_within_box(
275+
frame,
276+
f'marker {id}',
277+
center,
278+
font_scale=0.3,
279+
)
280+
281+
return frame
282+
283+
284+
class Detector:
285+
dictionary: Dictionary
286+
raw_dictionary: opencv_aruco.Dictionary
287+
detector: opencv_aruco.ArucoDetector
288+
289+
marker_size: float
290+
marker_local_coordinates: Float[numpy.ndarray, '3 4']
291+
292+
intrinsics: IntrinsicsMatrix
293+
distortion: DistortionCoefficients
294+
295+
def __init__(
296+
self,
297+
model: RigidModel,
298+
dictionary: Dictionary,
299+
detector_parameters: opencv_aruco.DetectorParameters,
300+
intrinsics: IntrinsicsMatrix,
301+
distortion: DistortionCoefficients,
302+
) -> None:
303+
self.dictionary = dictionary
304+
305+
raw_dictionary = opencv_aruco.getPredefinedDictionary(dictionary.value)
306+
self.raw_dictionary = raw_dictionary
307+
308+
self.detector = opencv_aruco.ArucoDetector(
309+
raw_dictionary,
310+
detector_parameters,
311+
)
312+
313+
self.model = model
314+
self.intrinsics = intrinsics
315+
self.distortion = distortion
316+
317+
def predict(self, frame: ArrayRgbFrame) -> Result | None:
318+
corners_dirty, ids_dirty, _rejected = self.detector.detectMarkers(frame)
319+
320+
corners = typing.cast(list[Float[numpy.ndarray, 'n 4']], corners_dirty)
321+
ids = typing.cast(Int[numpy.ndarray, ' n'], ids_dirty)
322+
323+
if len(corners) == 0 or len(ids) == 0:
324+
return None
325+
326+
intrinsics = self.intrinsics
327+
distortion = self.distortion
328+
329+
marker_rigid_coordinates = self.model.coordinates
330+
331+
results = [
332+
opencv.solvePnP(
333+
marker_rigid_coordinates,
334+
camera_coordinates,
335+
intrinsics,
336+
distortion,
337+
useExtrinsicGuess=True,
338+
flags=opencv.SOLVEPNP_IPPE_SQUARE,
339+
)
340+
for camera_coordinates in corners
341+
]
342+
343+
transformations = [
344+
Transformation(opencv.Rodrigues(rotation)[0], numpy.squeeze(translation))
345+
for success, rotation, translation in results
346+
if success
347+
]
348+
349+
return Result(numpy.stack(corners), ids.reshape(-1), transformations)

0 commit comments

Comments
 (0)