From 41c9bfe4e5507d2ed5c6038ee7c9f4086ffeed79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Sm=C3=B3=C5=82ka?= Date: Sat, 15 Feb 2025 01:01:59 +0100 Subject: [PATCH] Implemented `video-io` commit-id:319ebcfb --- packages/video-io/pyproject.toml | 17 +- packages/video-io/src/video_io/__init__.py | 15 ++ packages/video-io/src/video_io/annotation.py | 250 ++++++++++++++++++ packages/video-io/src/video_io/calibration.py | 49 ++++ packages/video-io/src/video_io/frame.py | 11 + packages/video-io/src/video_io/metadata.py | 26 ++ packages/video-io/src/video_io/reader.py | 94 +++++++ packages/video-io/src/video_io/visualizer.py | 63 +++++ packages/video-io/src/video_io/writer.py | 108 ++++++++ packages/video-io/tests/__init__.py | 1 + packages/video-io/tests/helpers.py | 18 ++ .../video-io/tests/performance/__init__.py | 0 .../tests/performance/opencv_baseline.py | 33 +++ .../video-io/tests/performance/test_reader.py | 50 ++++ uv.lock | 101 +++++++ 15 files changed, 835 insertions(+), 1 deletion(-) create mode 100644 packages/video-io/src/video_io/annotation.py create mode 100644 packages/video-io/src/video_io/calibration.py create mode 100644 packages/video-io/src/video_io/frame.py create mode 100644 packages/video-io/src/video_io/metadata.py create mode 100644 packages/video-io/src/video_io/reader.py create mode 100644 packages/video-io/src/video_io/visualizer.py create mode 100644 packages/video-io/src/video_io/writer.py create mode 100644 packages/video-io/tests/__init__.py create mode 100644 packages/video-io/tests/helpers.py create mode 100644 packages/video-io/tests/performance/__init__.py create mode 100644 packages/video-io/tests/performance/opencv_baseline.py create mode 100644 packages/video-io/tests/performance/test_reader.py diff --git a/packages/video-io/pyproject.toml b/packages/video-io/pyproject.toml index 5461022..a64f682 100644 --- a/packages/video-io/pyproject.toml +++ b/packages/video-io/pyproject.toml @@ -7,8 +7,23 @@ authors = [ { name = "Jan Smółka", email = "jp.smolka@gmail.com" } ] requires-python = ">=3.12.7" -dependencies = [] +dependencies = [ + "annotated-types>=0.7.0", + "attrs>=25.1.0", + "jaxtyping>=0.2.37", + "more-itertools>=10.6.0", + "opencv-python>=4.11.0.86", + "torch>=2.5.1", + "torchcodec>=0.2.0", + "torchvision>=0.21.0", +] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "icecream>=2.1.4", + "pytest-benchmark[histogram]>=5.1.0", +] diff --git a/packages/video-io/src/video_io/__init__.py b/packages/video-io/src/video_io/__init__.py index 8b13789..a5c2c1e 100644 --- a/packages/video-io/src/video_io/__init__.py +++ b/packages/video-io/src/video_io/__init__.py @@ -1 +1,16 @@ +from . import annotation, frame +from .calibration import Calibration +from .metadata import Metadata +from .reader import Reader +from .visualizer import Visualizer +from .writer import Writer +__all__ = [ + 'annotation', + 'Calibration', + 'Metadata', + 'Reader', + 'Visualizer', + 'Writer', + 'frame', +] diff --git a/packages/video-io/src/video_io/annotation.py b/packages/video-io/src/video_io/annotation.py new file mode 100644 index 0000000..a1f57ab --- /dev/null +++ b/packages/video-io/src/video_io/annotation.py @@ -0,0 +1,250 @@ +from typing import Annotated, Literal, Protocol, Self + +import cv2 as opencv +import numpy +from annotated_types import Ge, Lt + +type Byte = Annotated[int, Ge(0), Lt(255)] +type Color = tuple[Byte, Byte, Byte] + +type RgbFrame = numpy.ndarray[tuple[int, int, Literal[3]], numpy.dtype[numpy.uint8]] + +WHITE: Color = (255, 255, 255) +GREEN: Color = (0, 255, 0) +DARK_GRAY: Color = (90, 90, 90) + + +def draw_point_with_description( + frame: RgbFrame, + point: tuple[int, int], + text: str, + *, + point_radius: int = 1, + point_color: Color = GREEN, + text_location: Literal['above', 'below'] = 'above', + text_from_point_offset: int = 10, + font: int = opencv.FONT_HERSHEY_DUPLEX, + font_scale: float = 1.0, + font_thickness: int = 1, + font_color: Color = WHITE, + box_color: Color = DARK_GRAY, + box_opacity: float = 0.7, + box_margin: int = 4, +) -> RgbFrame: + opencv.circle(frame, point, point_radius, point_color, point_radius * 2) + + frame_height, frame_width, _ = frame.shape + + (text_width, text_height), _ = opencv.getTextSize( + text, + font, + font_scale, + font_thickness, + ) + + match text_location: + case 'above': + text_y_offset = text_height - 2 * box_margin - text_from_point_offset + y_min = text_height + box_margin + y_max = frame_height + + case 'below': + text_y_offset = text_height + 2 * box_margin + text_from_point_offset + y_min = 0 + y_max = frame_height - (text_height + box_margin) + + x_min = text_width // 2 + x_max = frame_width - x_min + + text_x = __clip(point[0] - text_width // 2, x_min, x_max) + text_y = __clip(point[1] + text_y_offset, y_min, y_max) + + draw_text_within_box( + frame, + text, + (text_x, text_y), + font=font, + font_scale=font_scale, + font_thickness=font_thickness, + font_color=font_color, + box_color=box_color, + box_opacity=box_opacity, + box_margin=box_margin, + ) + + return frame + + +def draw_text_within_box( + frame: RgbFrame, + text: str, + position: tuple[int, int], + *, + font: int = opencv.FONT_HERSHEY_DUPLEX, + font_scale: float = 1.0, + font_thickness: int = 1, + font_color: Color = WHITE, + box_color: Color = DARK_GRAY, + box_opacity: float = 0.7, + box_margin: int = 4, +) -> RgbFrame: + (text_width, text_height), _ = opencv.getTextSize( + text, + font, + font_scale, + font_thickness, + ) + + box_top_left = ( + max(text_height, position[0] - box_margin), + max(0, position[1] - box_margin - text_height), + ) + + box_bottom_right = ( + box_top_left[0] + text_width + 2 * box_margin, + box_top_left[1] + text_height + 2 * box_margin, + ) + + frame_height, frame_width, _ = frame.shape + match box_bottom_right[0] >= frame_width, box_bottom_right[1] >= frame_height: + case True, True: + box_bottom_right = (frame_width - 1, frame_height - 1) + box_top_left = ( + box_bottom_right[0] - text_width - 2 * box_margin, + box_bottom_right[1] - text_height - 2 * box_margin, + ) + + case True, False: + box_bottom_right = (frame_width - 1, box_bottom_right[1]) + box_top_left = ( + box_bottom_right[0] - text_width - 2 * box_margin, + box_top_left[1], + ) + + case False, True: + box_bottom_right = (box_bottom_right[0], frame_height - 1) + box_top_left = ( + box_top_left[0], + box_bottom_right[1] - text_height - 2 * box_margin, + ) + + box_sub_image = frame[ + box_top_left[1] : box_bottom_right[1], + box_top_left[0] : box_bottom_right[0], + ] + + rectangle_image = numpy.full(box_sub_image.shape, box_color, dtype=numpy.uint8) + + blended_image = opencv.addWeighted( + box_sub_image, + 1 - box_opacity, + rectangle_image, + box_opacity, + gamma=0.0, + ) + + frame[ + box_top_left[1] : box_bottom_right[1], + box_top_left[0] : box_bottom_right[0], + ] = blended_image + + opencv.putText( + frame, + text, + position, + font, + font_scale, + font_color, + font_thickness, + lineType=opencv.LINE_AA, + ) + + return frame + + +# TODO: Allow customising the text position +def draw_polygon_with_description( + frame: RgbFrame, + vertices: numpy.ndarray[tuple[int, Literal[2]], numpy.dtype[numpy.int32]], + text: str, + *, + area_color: Color = GREEN, + area_opacity: float = 0.5, + font: int = opencv.FONT_HERSHEY_DUPLEX, + font_color: Color = WHITE, + font_scale: float = 1.0, + font_thickness: int = 1, + box_color: Color = DARK_GRAY, + box_opacity: float = 0.7, + box_margin: int = 4, +) -> RgbFrame: + draw_filled_polygon_with_opacity( + frame, + vertices, + color=area_color, + opacity=area_opacity, + ) + + text_width = opencv.getTextSize(text, font, font_scale, font_thickness)[0][0] + + text_x: int + text_y: int + text_x, text_y = numpy.mean(vertices, axis=0).astype(int).tolist() + + text_x -= text_width // 2 + + draw_text_within_box( + frame, + text, + (text_x, text_y), + font=font, + font_scale=font_scale, + font_thickness=font_thickness, + font_color=font_color, + box_color=box_color, + box_opacity=box_opacity, + box_margin=box_margin, + ) + + return frame + + +def draw_filled_polygon_with_opacity( + frame: RgbFrame, + vertices: numpy.ndarray[tuple[int, Literal[2]], numpy.dtype[numpy.int32]], + *, + color: Color = GREEN, + opacity: float = 0.7, +) -> RgbFrame: + solid_color = numpy.zeros_like(frame, dtype=numpy.uint8) + solid_color[:] = numpy.array(color, dtype=numpy.uint8) + + mask = numpy.zeros_like(frame, dtype=numpy.uint8) + opencv.fillPoly(mask, [vertices], (255, 255, 255)) + negative_mask = numpy.full_like(mask, 255) - mask + + colored_polygon = opencv.bitwise_and(solid_color, mask) + polygon_on_frame = opencv.addWeighted( + colored_polygon, + opacity, + frame, + 1 - opacity, + 0, + ) + + opencv.bitwise_or( + opencv.bitwise_and(frame, negative_mask), + opencv.bitwise_and(polygon_on_frame, mask), + frame, + ) + + return frame + + +class Comparable(Protocol): + def __lt__(self, _other: Self, /) -> bool: ... + def __gt__(self, _other: Self, /) -> bool: ... + + +def __clip[T: Comparable](value: T, min_value: T, max_value: T) -> T: + return min(min_value, max(value, max_value)) diff --git a/packages/video-io/src/video_io/calibration.py b/packages/video-io/src/video_io/calibration.py new file mode 100644 index 0000000..176c7ff --- /dev/null +++ b/packages/video-io/src/video_io/calibration.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass + +import serde +import torch +from jaxtyping import Float, Float64 + + +@serde.serde +@dataclass(slots=True) +class Calibration: + focal_length: tuple[float, float] + optical_center: tuple[float, float] + distortion: tuple[float, float, float, float, float] + + def intrinsics_matrix(self) -> Float64[torch.Tensor, '3 3']: + output = torch.zeros((3, 3), dtype=torch.float64) + + fx, fy = self.focal_length + cx, cy = self.optical_center + + output[0, 0] = fx + output[1, 1] = fy + output[0, 2] = cx + output[1, 2] = cy + output[2, 2] = 1.0 + + return output + + def distortion_vector(self) -> Float64[torch.Tensor, '5']: + return torch.tensor(self.distortion, dtype=torch.float64) + + def unproject_depth( + self, + depth: Float[torch.Tensor, 'height width'], + ) -> Float[torch.Tensor, '3 height width']: + *_, height, width = depth.shape + + u = torch.arange(width) + v = torch.arange(height) + u, v = torch.meshgrid(u, v, indexing='xy') + + fx, fy = self.focal_length + cx, cy = self.optical_center + + x = (u - cx) * depth / fx + y = (v - cy) * depth / fy + z = depth + + return torch.stack((x, y, z)) diff --git a/packages/video-io/src/video_io/frame.py b/packages/video-io/src/video_io/frame.py new file mode 100644 index 0000000..84f3d4b --- /dev/null +++ b/packages/video-io/src/video_io/frame.py @@ -0,0 +1,11 @@ +import numpy +import torch +from jaxtyping import UInt8 +from numpy.typing import NDArray + +type Array = NDArray[numpy.uint8] + +type ArrayRgbFrame = UInt8[numpy.ndarray, 'height width 3'] +type ArrayGrayFrame = UInt8[numpy.ndarray, 'height width'] + +type TensorRgbFrame = UInt8[torch.Tensor, '3 height width'] diff --git a/packages/video-io/src/video_io/metadata.py b/packages/video-io/src/video_io/metadata.py new file mode 100644 index 0000000..d3a0d1e --- /dev/null +++ b/packages/video-io/src/video_io/metadata.py @@ -0,0 +1,26 @@ +from typing import Self + +from attrs import frozen +from torchcodec.decoders import VideoStreamMetadata # type: ignore[attr-defined] + + +@frozen +class Metadata: + fps: float + frames: int + width: int + height: int + + @classmethod + def from_stream_metadata(cls, stream_metadata: VideoStreamMetadata) -> Self: + fps = stream_metadata.average_fps_from_header + frames = stream_metadata.num_frames + width = stream_metadata.width + height = stream_metadata.height + + assert fps is not None + assert frames is not None + assert width is not None + assert height is not None + + return cls(fps, frames, width, height) diff --git a/packages/video-io/src/video_io/reader.py b/packages/video-io/src/video_io/reader.py new file mode 100644 index 0000000..2ed0415 --- /dev/null +++ b/packages/video-io/src/video_io/reader.py @@ -0,0 +1,94 @@ +from collections.abc import Generator +from pathlib import Path +from typing import Annotated, Final, cast + +import torch +from annotated_types import Gt +from jaxtyping import UInt8 +from more_itertools import take +from torchcodec.decoders import ( # type: ignore[attr-defined] + VideoDecoder, + VideoStreamMetadata, +) +from torchvision.transforms import Compose, Resize + +from . import Metadata + + +class Reader: + __decoder: VideoDecoder + __transformation: Compose + __frame_indices: Generator[int, None, None] + + metadata: Final[Metadata] + device: Final[torch.device] + + def __init__( + self, + source: Path, + device: torch.device = torch.device('cpu'), + fps: float | None = None, + width: int | None = None, + height: int | None = None, + ) -> None: + self.device = device + + self.__decoder = VideoDecoder( + source, + device='cpu' if device == torch.device('mps') else str(device), + ) + + self.metadata = metadata = Metadata.from_stream_metadata( + # `VideoDecoder.metadata` has strange typing which forces a manual downcast ;v + cast(VideoStreamMetadata, self.__decoder.metadata) + ) + + if fps is None: + self.__frame_indices = (i for i in range(metadata.frames)) + else: + frames = metadata.frames + interpolated_length = int(frames * fps / metadata.fps) + + self.__frame_indices = ( + int(i) + for i in torch.linspace( + 0, + frames, + steps=interpolated_length, + dtype=torch.int, + ) + ) + + match height, width: + case None, None: + self.__transformation = Compose(()) # type: ignore[no-untyped-call] + + case None, int(w): + self.__transformation = Compose([Resize((metadata.height, w))]) # type: ignore[no-untyped-call] + + case int(h), None: + self.__transformation = Compose([Resize((h, metadata.width))]) # type: ignore[no-untyped-call] + + case int(h), int(w): + self.__transformation = Compose([Resize((h, w))]) # type: ignore[no-untyped-call] + + def read(self) -> UInt8[torch.Tensor, '3 height width'] | None: + match next(self.__frame_indices, None): + case None: + return None + + case index: + return self.__decoder.get_frame_at(index).data + + def read_batch( + self, + size: Annotated[int, Gt(0)], + ) -> UInt8[torch.Tensor, 'size 3 height width'] | None: + assert size > 0, 'Expected positive batch size' + + indices = take(size, self.__frame_indices) + if len(indices) == 0: + return None + + frames = self.__decoder.get_frames_at(indices).data.to(self.device) + return cast(torch.Tensor, self.__transformation(frames)) diff --git a/packages/video-io/src/video_io/visualizer.py b/packages/video-io/src/video_io/visualizer.py new file mode 100644 index 0000000..8ed81db --- /dev/null +++ b/packages/video-io/src/video_io/visualizer.py @@ -0,0 +1,63 @@ +from collections.abc import Iterable, Mapping +from typing import Any, Literal, Protocol, TypedDict + +import numpy +from attrs import frozen + +type RgbFrame = numpy.ndarray[tuple[int, int, Literal[3]], numpy.dtype[numpy.uint8]] + + +class Visualizable[Context: Mapping[str, Any]](Protocol): + def draw( + self, + frame: RgbFrame, + context: Context, + ) -> RgbFrame: ... + + +@frozen +class Visualizer[Context: Mapping[str, Any]]: + context: Context + + def annotate( + self, + frame: RgbFrame, + items: Iterable[Visualizable[Context]], + ) -> RgbFrame: + context = self.context + + for item in items: + item.draw(frame, context) + + return frame + + def annotate_batch( + self, + frames: Iterable[RgbFrame], + items: Iterable[Iterable[Visualizable[Context]]], + ) -> list[RgbFrame]: + return [self.annotate(frame, items) for frame, items in zip(frames, items)] + + +# A simple example on how to use 'contexts' in a type-safe way. +if __name__ == '__main__': + + class SampleContext(TypedDict): + x: int + y: float + + class ExtendedContext(TypedDict): + x: int + y: float + z: str + + @frozen + class A: + x: int + + def draw(self, frame: RgbFrame, context: SampleContext) -> RgbFrame: + return frame + + vis = Visualizer[ExtendedContext]({'x': 1, 'y': 1.0, 'z': ''}) + + vis.annotate(numpy.array(()), [A(10)]) diff --git a/packages/video-io/src/video_io/writer.py b/packages/video-io/src/video_io/writer.py new file mode 100644 index 0000000..0fcae2d --- /dev/null +++ b/packages/video-io/src/video_io/writer.py @@ -0,0 +1,108 @@ +from collections.abc import Iterable, Mapping +from pathlib import Path +from typing import cast + +import cv2 as opencv +import torch +from jaxtyping import UInt8 + +from video_io.metadata import Metadata +from video_io.visualizer import Visualizable, Visualizer + + +class Writer[Context: Mapping[str, object]]: + __visualizer: Visualizer[Context] + __encoder: opencv.VideoWriter + + def __init__( + self, + destination: Path, + metadata: Metadata, + # A funny trick to make passing the visualizer optional while keeping the type safety. + # It's possible as long as `Visualizer` is immutable. + visualizer: Visualizer[Context] = Visualizer(cast(Context, {})), + ) -> None: + if destination.exists(): + raise FileExistsError( + f'Destination file "{destination.absolute()}" already exists' + ) + + self.__visualizer = visualizer + + self.__encoder = opencv.VideoWriter( + str(destination), + fourcc=self.__codec(destination.suffix), + fps=metadata.fps, + frameSize=(metadata.width, metadata.height), + isColor=True, + ) + + def write( + self, + frame: UInt8[torch.Tensor, '3 height width'], + annotations: Iterable[Visualizable[Context]] | None = None, + ) -> None: + self.write_batch( + frame.unsqueeze(0), + [annotations] if annotations is not None else None, + ) + + def write_batch( + self, + frames: UInt8[torch.Tensor, 'batch 3 height width'], + annotations: Iterable[Iterable[Visualizable[Context]]] | None = None, + ) -> None: + raw_frames = frames.permute(0, 2, 3, 1).cpu().detach().numpy() + + if annotations is not None: + self.__visualizer.annotate_batch(raw_frames, annotations) + + encoder = self.__encoder + + for frame in raw_frames: + encoder.write(frame) + + @staticmethod + def __codec(file_extension: str) -> int: + match file_extension: + case '.avi': + return opencv.VideoWriter.fourcc(*'mjpg') + + case '.mp4': + return opencv.VideoWriter.fourcc(*'mp4v') + + case _: + raise UnsupportedFormatException( + f'File extension "{file_extension}" is not supported' + ) + + +class UnsupportedFormatException(Exception): ... + + +if __name__ == '__main__': + from typing import Literal, TypedDict + + import numpy + from attrs import define + + class ContextA(TypedDict): + z: float + + @define + class A: + x: int + + def draw( + self, + frame: numpy.ndarray[tuple[int, int, Literal[3]], numpy.dtype[numpy.uint8]], + context: ContextA, + ) -> numpy.ndarray[tuple[int, int, Literal[3]], numpy.dtype[numpy.uint8]]: + return frame + + # w = Writer(Path('nothing.mp4'), Metadata(0.0, 0, 0, 0)) + # w.write(torch.tensor(()), [A(10)]) # typing error + + v = Visualizer[ContextA]({'z': 10.0}) + w = Writer(Path('nothing2.mp4'), Metadata(0.0, 0, 0, 0), v) + w.write(torch.tensor(()), [A(10)]) # ok diff --git a/packages/video-io/tests/__init__.py b/packages/video-io/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/packages/video-io/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/packages/video-io/tests/helpers.py b/packages/video-io/tests/helpers.py new file mode 100644 index 0000000..d53dcd9 --- /dev/null +++ b/packages/video-io/tests/helpers.py @@ -0,0 +1,18 @@ +from pathlib import Path + +from more_itertools import first_true + + +def __workspace_root() -> Path: + for parent in Path(__file__).parents: + content = parent.glob('*') + + if first_true(content, None, lambda file: file.name == 'uv.lock') is not None: + return parent + + assert False, 'unreachable' + + +WORKSPACE_ROOT = __workspace_root() +DEVELOPMENT_DIRECTORY = WORKSPACE_ROOT / 'development' +TEST_DATA_DIRECTORY = DEVELOPMENT_DIRECTORY / 'test_data' diff --git a/packages/video-io/tests/performance/__init__.py b/packages/video-io/tests/performance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/video-io/tests/performance/opencv_baseline.py b/packages/video-io/tests/performance/opencv_baseline.py new file mode 100644 index 0000000..d2733c3 --- /dev/null +++ b/packages/video-io/tests/performance/opencv_baseline.py @@ -0,0 +1,33 @@ +from pathlib import Path +from typing import Literal + +import cv2 as opencv +import numpy + + +class PoorMansReader: + __decoder: opencv.VideoCapture + __height: int + __width: int + + def __init__(self, source: Path) -> None: + assert source.is_file() + + self.__decoder = decoder = opencv.VideoCapture(str(source)) + self.__height = int(decoder.get(opencv.CAP_PROP_FRAME_HEIGHT)) + self.__width = int(decoder.get(opencv.CAP_PROP_FRAME_WIDTH)) + + def read_batch( + self, + size: int, + ) -> numpy.ndarray[tuple[int, int, int, Literal[3]], numpy.dtype[numpy.uint8]] | None: + batch = numpy.empty((size, self.__height, self.__width, 3), dtype=numpy.uint8) + + decoder = self.__decoder + + for i in range(size): + success, _ = decoder.read(batch[i, ...]) + if not success: + return None + + return batch diff --git a/packages/video-io/tests/performance/test_reader.py b/packages/video-io/tests/performance/test_reader.py new file mode 100644 index 0000000..60bcbb1 --- /dev/null +++ b/packages/video-io/tests/performance/test_reader.py @@ -0,0 +1,50 @@ +from pathlib import Path +from typing import Literal, Protocol + +import pytest +import torch +from pytest_benchmark.fixture import BenchmarkFixture +from video_io.reader import Reader + +from ..helpers import TEST_DATA_DIRECTORY +from .opencv_baseline import PoorMansReader + + +@pytest.fixture(scope='module') +def calibration_video() -> Path: + return TEST_DATA_DIRECTORY / 'calibration' / 'lab_ceiling.avi' + + +@pytest.mark.benchmark( + group='reader-benchmark', + disable_gc=True, + min_rounds=10, +) +@pytest.mark.skip(reason='Performance test is not a part of the standard suite.') +@pytest.mark.usefixtures('calibration_video') +@pytest.mark.parametrize('batch_size', [10, 30, 50]) +@pytest.mark.parametrize('reader_type', ['torch', 'opencv']) +def test_reader( + benchmark: BenchmarkFixture, + calibration_video: Path, + batch_size: int, + reader_type: Literal['torch', 'opencv'], +) -> None: + reader = ( + Reader(calibration_video, torch.device('cpu')) + if reader_type == 'torch' + else PoorMansReader(calibration_video) + ) + benchmark(lambda: read_whole_video(reader, batch_size)) + + +class AnyReader(Protocol): + def read_batch(self, size: int) -> object | None: ... + + +def read_whole_video(reader: AnyReader, batch_size: int) -> None: + while True: + batch = reader.read_batch(batch_size) + + if batch is None: + break diff --git a/uv.lock b/uv.lock index e5f59be..5af24e2 100644 --- a/uv.lock +++ b/uv.lock @@ -22,6 +22,15 @@ members = [ "vpc", ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + [[package]] name = "asttokens" version = "3.0.0" @@ -377,6 +386,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] +[[package]] +name = "jaxtyping" +version = "0.2.38" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wadler-lindig" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/a5/83fbf2ed24f8bd9af80536b3139e9c9cb8fb096d6ceeb28965b847fae9ae/jaxtyping-0.2.38.tar.gz", hash = "sha256:84d509341437189e82d7dbb59a2970435724851ca79fd8550e886cd37c048333", size = 45785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/7e/da7b57a1f3af7303a0f3c8594d820fc0d3a9bbe3810a357eb21eb166e76b/jaxtyping-0.2.38-py3-none-any.whl", hash = "sha256:bc209ab8ec29917b6f0c7dec4a8ea1fc276f7d94f25b71c01d1243ec2b21ae12", size = 56375 }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -806,6 +827,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335 }, ] +[[package]] +name = "pygal" +version = "3.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/7b/8f50821a0f1585881ef40ae13ecb7603b0d81ef99fedf992ec35e6b6f7d5/pygal-3.0.5.tar.gz", hash = "sha256:c0a0f34e5bc1c01975c2bfb8342ad521e293ad42e525699dd00c4d7a52c14b71", size = 80489 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/7d/b5d656dbeb73f488ce7409a75108a775f6cf8e20624ed8025a9476cbc1bb/pygal-3.0.5-py3-none-any.whl", hash = "sha256:a3268a5667b470c8fbbb0eca7e987561a7321caeba589d40e4c1bc16dbe71393", size = 129548 }, +] + +[[package]] +name = "pygaljs" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/75/19/3a53f34232a9e6ddad665e71c83693c5db9a31f71785105905c5bc9fbbba/pygaljs-1.0.2.tar.gz", hash = "sha256:0b71ee32495dcba5fbb4a0476ddbba07658ad65f5675e4ad409baf154dec5111", size = 89711 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/6f/07dab31ca496feda35cf3455b9e9380c43b5c685bb54ad890831c790da38/pygaljs-1.0.2-py2.py3-none-any.whl", hash = "sha256:d75e18cb21cc2cda40c45c3ee690771e5e3d4652bf57206f20137cf475c0dbe8", size = 91111 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -852,6 +894,13 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/d6/b41653199ea09d5969d4e385df9bbfd9a100f28ca7e824ce7c0a016e3053/pytest_benchmark-5.1.0-py3-none-any.whl", hash = "sha256:922de2dfa3033c227c96da942d1878191afa135a29485fb942e85dff1c592c89", size = 44259 }, ] +[package.optional-dependencies] +histogram = [ + { name = "pygal" }, + { name = "pygaljs" }, + { name = "setuptools" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1101,6 +1150,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/b4/605ae4173aa37fb5aa14605d100ff31f4f5d49f617928c9f486bb3aaec08/torch-2.6.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:9a610afe216a85a8b9bc9f8365ed561535c93e804c2a317ef7fabcc5deda0989", size = 66532538 }, ] +[[package]] +name = "torchcodec" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/fc/ad0931351b084c1a9840e018543d1316a1dbf6cc8b776c1d81045c2284fc/TorchCodec-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4b194bfd3f8cc77986e327c8a13d4eb86ef1eba860096e81117cd6b9cc64960", size = 3035140 }, + { url = "https://files.pythonhosted.org/packages/2c/e8/16093552d6381bc943bd1bfb0f27aa5c0382d8787449124d78b06213d96b/TorchCodec-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0735917480efe7c7b7ce3f1a7ccc9832faf43d085cce75cdd031fd7f8f14cbb9", size = 766567 }, +] + [[package]] name = "torchvision" version = "0.21.0" @@ -1249,12 +1307,55 @@ wheels = [ name = "video-io" version = "0.1.0" source = { editable = "packages/video-io" } +dependencies = [ + { name = "annotated-types" }, + { name = "attrs" }, + { name = "jaxtyping" }, + { name = "more-itertools" }, + { name = "opencv-python" }, + { name = "torch" }, + { name = "torchcodec" }, + { name = "torchvision" }, +] + +[package.dev-dependencies] +dev = [ + { name = "icecream" }, + { name = "pytest-benchmark", extra = ["histogram"] }, +] + +[package.metadata] +requires-dist = [ + { name = "annotated-types", specifier = ">=0.7.0" }, + { name = "attrs", specifier = ">=25.1.0" }, + { name = "jaxtyping", specifier = ">=0.2.37" }, + { name = "more-itertools", specifier = ">=10.6.0" }, + { name = "opencv-python", specifier = ">=4.11.0.86" }, + { name = "torch", specifier = ">=2.5.1" }, + { name = "torchcodec", specifier = ">=0.2.0" }, + { name = "torchvision", specifier = ">=0.21.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "icecream", specifier = ">=2.1.4" }, + { name = "pytest-benchmark", extras = ["histogram"], specifier = ">=5.1.0" }, +] [[package]] name = "vpc" version = "0.1.0" source = { editable = "packages/vpc" } +[[package]] +name = "wadler-lindig" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/39/7a/fea25d7985211556bbe2511d42e07453b484bf8e0d5d6109aabb08f52784/wadler_lindig-0.1.4.tar.gz", hash = "sha256:75aa3ddd384573c41d5c910fd990e655c2a641e5093cf5081650d0229daf87ad", size = 15356 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/69/cfb1af44622044d4db0cad65721d283a921a4795f0ad121616b9eaa6ccd7/wadler_lindig-0.1.4-py3-none-any.whl", hash = "sha256:5c463aeb1f4ddc4acc12c3708d22ae21bcfc3e19e7c4d7aeef6642ea57b1a8b8", size = 20126 }, +] + [[package]] name = "zipp" version = "3.21.0"