Skip to content

Commit 0e26aa8

Browse files
Implemented video-io
commit-id:319ebcfb
1 parent 4ef1eee commit 0e26aa8

15 files changed

+820
-1
lines changed

packages/video-io/pyproject.toml

+15-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,22 @@ authors = [
77
{ name = "Jan Smółka", email = "[email protected]" }
88
]
99
requires-python = ">=3.12.7"
10-
dependencies = []
10+
dependencies = [
11+
"annotated-types>=0.7.0",
12+
"attrs>=25.1.0",
13+
"jaxtyping>=0.2.37",
14+
"more-itertools>=10.6.0",
15+
"opencv-python>=4.11.0.86",
16+
"torch>=2.5.1",
17+
"torchcodec>=0.2.0",
18+
"torchvision>=0.21.0",
19+
]
1120

1221
[build-system]
1322
requires = ["hatchling"]
1423
build-backend = "hatchling.build"
24+
25+
[dependency-groups]
26+
dev = [
27+
"pytest-benchmark[histogram]>=5.1.0",
28+
]
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
1+
from . import annotation, frame
2+
from .metadata import Metadata
3+
from .reader import Reader
4+
from .writer import Writer
15

6+
__all__ = ['annotation', 'Metadata', 'Reader', 'Writer', 'frame']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
from typing import Annotated, Literal, Protocol, Self
2+
3+
import cv2 as opencv
4+
import numpy
5+
from annotated_types import Ge, Lt
6+
7+
type Byte = Annotated[int, Ge(0), Lt(255)]
8+
type Color = tuple[Byte, Byte, Byte]
9+
10+
type RgbFrame = numpy.ndarray[tuple[int, int, Literal[3]], numpy.dtype[numpy.uint8]]
11+
12+
WHITE: Color = (255, 255, 255)
13+
GREEN: Color = (0, 255, 0)
14+
DARK_GRAY: Color = (90, 90, 90)
15+
16+
17+
def draw_point_with_description(
18+
frame: RgbFrame,
19+
point: tuple[int, int],
20+
text: str,
21+
*,
22+
point_radius: int = 1,
23+
point_color: Color = GREEN,
24+
text_location: Literal['above', 'below'] = 'above',
25+
text_from_point_offset: int = 10,
26+
font: int = opencv.FONT_HERSHEY_DUPLEX,
27+
font_scale: float = 1.0,
28+
font_thickness: int = 1,
29+
font_color: Color = WHITE,
30+
box_color: Color = DARK_GRAY,
31+
box_opacity: float = 0.7,
32+
box_margin: int = 4,
33+
) -> RgbFrame:
34+
opencv.circle(frame, point, point_radius, point_color, point_radius * 2)
35+
36+
frame_height, frame_width, _ = frame.shape
37+
38+
(text_width, text_height), _ = opencv.getTextSize(
39+
text,
40+
font,
41+
font_scale,
42+
font_thickness,
43+
)
44+
45+
match text_location:
46+
case 'above':
47+
text_y_offset = text_height - 2 * box_margin - text_from_point_offset
48+
y_min = text_height + box_margin
49+
y_max = frame_height
50+
51+
case 'below':
52+
text_y_offset = text_height + 2 * box_margin + text_from_point_offset
53+
y_min = 0
54+
y_max = frame_height - (text_height + box_margin)
55+
56+
x_min = text_width // 2
57+
x_max = frame_width - x_min
58+
59+
text_x = __clip(point[0] - text_width // 2, x_min, x_max)
60+
text_y = __clip(point[1] + text_y_offset, y_min, y_max)
61+
62+
draw_text_within_box(
63+
frame,
64+
text,
65+
(text_x, text_y),
66+
font=font,
67+
font_scale=font_scale,
68+
font_thickness=font_thickness,
69+
font_color=font_color,
70+
box_color=box_color,
71+
box_opacity=box_opacity,
72+
box_margin=box_margin,
73+
)
74+
75+
return frame
76+
77+
78+
def draw_text_within_box(
79+
frame: RgbFrame,
80+
text: str,
81+
position: tuple[int, int],
82+
*,
83+
font: int = opencv.FONT_HERSHEY_DUPLEX,
84+
font_scale: float = 1.0,
85+
font_thickness: int = 1,
86+
font_color: Color = WHITE,
87+
box_color: Color = DARK_GRAY,
88+
box_opacity: float = 0.7,
89+
box_margin: int = 4,
90+
) -> RgbFrame:
91+
(text_width, text_height), _ = opencv.getTextSize(
92+
text,
93+
font,
94+
font_scale,
95+
font_thickness,
96+
)
97+
98+
box_top_left = (
99+
max(text_height, position[0] - box_margin),
100+
max(0, position[1] - box_margin - text_height),
101+
)
102+
103+
box_bottom_right = (
104+
box_top_left[0] + text_width + 2 * box_margin,
105+
box_top_left[1] + text_height + 2 * box_margin,
106+
)
107+
108+
frame_height, frame_width, _ = frame.shape
109+
match box_bottom_right[0] >= frame_width, box_bottom_right[1] >= frame_height:
110+
case True, True:
111+
box_bottom_right = (frame_width - 1, frame_height - 1)
112+
box_top_left = (
113+
box_bottom_right[0] - text_width - 2 * box_margin,
114+
box_bottom_right[1] - text_height - 2 * box_margin,
115+
)
116+
117+
case True, False:
118+
box_bottom_right = (frame_width - 1, box_bottom_right[1])
119+
box_top_left = (
120+
box_bottom_right[0] - text_width - 2 * box_margin,
121+
box_top_left[1],
122+
)
123+
124+
case False, True:
125+
box_bottom_right = (box_bottom_right[0], frame_height - 1)
126+
box_top_left = (
127+
box_top_left[0],
128+
box_bottom_right[1] - text_height - 2 * box_margin,
129+
)
130+
131+
box_sub_image = frame[
132+
box_top_left[1] : box_bottom_right[1],
133+
box_top_left[0] : box_bottom_right[0],
134+
]
135+
136+
rectangle_image = numpy.full(box_sub_image.shape, box_color, dtype=numpy.uint8)
137+
138+
blended_image = opencv.addWeighted(
139+
box_sub_image,
140+
1 - box_opacity,
141+
rectangle_image,
142+
box_opacity,
143+
gamma=0.0,
144+
)
145+
146+
frame[
147+
box_top_left[1] : box_bottom_right[1],
148+
box_top_left[0] : box_bottom_right[0],
149+
] = blended_image
150+
151+
opencv.putText(
152+
frame,
153+
text,
154+
position,
155+
font,
156+
font_scale,
157+
font_color,
158+
font_thickness,
159+
lineType=opencv.LINE_AA,
160+
)
161+
162+
return frame
163+
164+
165+
# TODO: Allow customising the text position
166+
def draw_polygon_with_description(
167+
frame: RgbFrame,
168+
vertices: numpy.ndarray[tuple[int, Literal[2]], numpy.dtype[numpy.int32]],
169+
text: str,
170+
*,
171+
area_color: Color = GREEN,
172+
area_opacity: float = 0.5,
173+
font: int = opencv.FONT_HERSHEY_DUPLEX,
174+
font_color: Color = WHITE,
175+
font_scale: float = 1.0,
176+
font_thickness: int = 1,
177+
box_color: Color = DARK_GRAY,
178+
box_opacity: float = 0.7,
179+
box_margin: int = 4,
180+
) -> RgbFrame:
181+
draw_filled_polygon_with_opacity(
182+
frame,
183+
vertices,
184+
color=area_color,
185+
opacity=area_opacity,
186+
)
187+
188+
text_width = opencv.getTextSize(text, font, font_scale, font_thickness)[0][0]
189+
190+
text_x: int
191+
text_y: int
192+
text_x, text_y = numpy.mean(vertices, axis=0).astype(int).tolist()
193+
194+
text_x -= text_width // 2
195+
196+
draw_text_within_box(
197+
frame,
198+
text,
199+
(text_x, text_y),
200+
font=font,
201+
font_scale=font_scale,
202+
font_thickness=font_thickness,
203+
font_color=font_color,
204+
box_color=box_color,
205+
box_opacity=box_opacity,
206+
box_margin=box_margin,
207+
)
208+
209+
return frame
210+
211+
212+
def draw_filled_polygon_with_opacity(
213+
frame: RgbFrame,
214+
vertices: numpy.ndarray[tuple[int, Literal[2]], numpy.dtype[numpy.int32]],
215+
*,
216+
color: Color = GREEN,
217+
opacity: float = 0.7,
218+
) -> RgbFrame:
219+
solid_color = numpy.zeros_like(frame, dtype=numpy.uint8)
220+
solid_color[:] = numpy.array(color, dtype=numpy.uint8)
221+
222+
mask = numpy.zeros_like(frame, dtype=numpy.uint8)
223+
opencv.fillPoly(mask, [vertices], (255, 255, 255))
224+
negative_mask = numpy.full_like(mask, 255) - mask
225+
226+
colored_polygon = opencv.bitwise_and(solid_color, mask)
227+
polygon_on_frame = opencv.addWeighted(
228+
colored_polygon,
229+
opacity,
230+
frame,
231+
1 - opacity,
232+
0,
233+
)
234+
235+
opencv.bitwise_or(
236+
opencv.bitwise_and(frame, negative_mask),
237+
opencv.bitwise_and(polygon_on_frame, mask),
238+
frame,
239+
)
240+
241+
return frame
242+
243+
244+
class Comparable(Protocol):
245+
def __lt__(self, _other: Self, /) -> bool: ...
246+
def __gt__(self, _other: Self, /) -> bool: ...
247+
248+
249+
def __clip[T: Comparable](value: T, min_value: T, max_value: T) -> T:
250+
return min(min_value, max(value, max_value))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from dataclasses import dataclass
2+
3+
import serde
4+
import torch
5+
from jaxtyping import Float, Float64
6+
7+
8+
@serde.serde
9+
@dataclass(slots=True)
10+
class Calibration:
11+
focal_length: tuple[float, float]
12+
optical_center: tuple[float, float]
13+
distortion: tuple[float, float, float, float, float]
14+
15+
def intrinsics_matrix(self) -> Float64[torch.Tensor, '3 3']:
16+
output = torch.zeros((4, 4), dtype=torch.float64)
17+
18+
fx, fy = self.focal_length
19+
cx, cy = self.optical_center
20+
21+
output[0, 0] = fx
22+
output[1, 1] = fy
23+
output[0, 2] = cx
24+
output[1, 2] = cy
25+
output[2, 2] = 1.0
26+
27+
return output
28+
29+
def distortion_vector(self) -> Float64[torch.Tensor, '5']:
30+
return torch.tensor(self.distortion, dtype=torch.float64)
31+
32+
def unproject_depth(
33+
self,
34+
depth: Float[torch.Tensor, 'height width'],
35+
) -> Float[torch.Tensor, 'height width']:
36+
*_, height, width = depth.shape
37+
38+
u = torch.arange(width)
39+
v = torch.arange(height)
40+
u, v = torch.meshgrid(u, v)
41+
42+
fx, fy = self.focal_length
43+
cx, cy = self.optical_center
44+
45+
x = (u - cx) * depth / fx
46+
y = (v - cy) * depth / fy
47+
z = depth
48+
49+
return torch.stack((x, y, z))
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import numpy
2+
import torch
3+
from jaxtyping import UInt8
4+
from numpy.typing import NDArray
5+
6+
type Array = NDArray[numpy.uint8]
7+
8+
type ArrayRgbFrame = UInt8[numpy.ndarray, 'height width 3']
9+
type ArrayGrayFrame = UInt8[numpy.ndarray, 'height width']
10+
11+
type TensorRgbFrame = UInt8[torch.Tensor, '3 height width']
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from typing import Self
2+
3+
from attrs import frozen
4+
from torchcodec.decoders import VideoStreamMetadata # type: ignore[attr-defined]
5+
6+
7+
@frozen
8+
class Metadata:
9+
fps: float
10+
frames: int
11+
width: int
12+
height: int
13+
14+
@classmethod
15+
def from_stream_metadata(cls, stream_metadata: VideoStreamMetadata) -> Self:
16+
fps = stream_metadata.average_fps
17+
frames = stream_metadata.num_frames
18+
width = stream_metadata.width
19+
height = stream_metadata.height
20+
21+
assert fps is not None
22+
assert frames is not None
23+
assert width is not None
24+
assert height is not None
25+
26+
return cls(fps, frames, width, height)

0 commit comments

Comments
 (0)