Skip to content

Commit 462b1f0

Browse files
EldiesSpecLadzhiltsov-max
authored
Updating ffmpeg to 8.0 (#9552)
- Updated FFmpeg to 8.0, OpenH264 to 2.6.0 and av to 15.1.0 - Added an error resolution script to help users in the case of issues with the updated version in old tasks --------- Co-authored-by: Roman Donchenko <[email protected]> Co-authored-by: Maxim Zhiltsov <[email protected]>
1 parent 4849a5e commit 462b1f0

File tree

11 files changed

+181
-56
lines changed

11 files changed

+181
-56
lines changed

Dockerfile

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ FROM build-image-base AS build-image-av
3737
ARG PREFIX=/opt/ffmpeg
3838
ARG PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig
3939

40-
ENV FFMPEG_VERSION=4.3.1 \
41-
OPENH264_VERSION=2.1.1
40+
ENV FFMPEG_VERSION=8.0 \
41+
OPENH264_VERSION=2.6.0
4242

4343
WORKDIR /tmp/openh264
4444
RUN curl -sL https://github.com/cisco/openh264/archive/v${OPENH264_VERSION}.tar.gz --output - | \
@@ -61,11 +61,8 @@ COPY utils/dataset_manifest/requirements.txt /tmp/utils/dataset_manifest/require
6161
RUN grep -q '^av==' /tmp/utils/dataset_manifest/requirements.txt
6262
RUN sed -i '/^av==/!d' /tmp/utils/dataset_manifest/requirements.txt
6363

64-
# Work around https://github.com/PyAV-Org/PyAV/issues/1140
65-
RUN pip install setuptools wheel 'cython<3'
66-
6764
RUN --mount=type=cache,target=/root/.cache/pip/http-v2 \
68-
python3 -m pip wheel --no-binary=av --no-build-isolation \
65+
python3 -m pip wheel --no-binary=av \
6966
-r /tmp/utils/dataset_manifest/requirements.txt \
7067
-w /tmp/wheelhouse
7168

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### Changed
2+
3+
- FFmpeg updated to 8.0
4+
(<https://github.com/cvat-ai/cvat/pull/9552>)

cvat/apps/engine/media_extractors.py

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
from abc import ABC, abstractmethod
1616
from bisect import bisect
1717
from collections.abc import Generator, Iterable, Iterator, Sequence
18-
from contextlib import AbstractContextManager, ExitStack, closing, contextmanager
18+
from contextlib import ExitStack, closing
1919
from dataclasses import dataclass
2020
from enum import IntEnum
21+
from fractions import Fraction
2122
from random import shuffle
2223
from typing import Any, Callable, Optional, Protocol, TypeVar, Union
2324

@@ -571,28 +572,14 @@ def extract(self):
571572
os.remove(self._zip_source.filename)
572573

573574
class _AvVideoReading:
574-
@contextmanager
575575
def read_av_container(
576576
self, source: Union[str, io.BytesIO]
577-
) -> Generator[av.container.InputContainer, None, None]:
577+
) -> av.container.InputContainer:
578578
if isinstance(source, io.BytesIO):
579579
source.seek(0) # required for re-reading
580580

581-
container = av.open(source)
582-
try:
583-
yield container
584-
finally:
585-
# fixes a memory leak in input container closing
586-
# https://github.com/PyAV-Org/PyAV/issues/1117
587-
for stream in container.streams:
588-
context = stream.codec_context
589-
if context and context.is_open:
590-
# Currently, context closing may get stuck on some videos for an unknown reason,
591-
# so the thread_type == 'AUTO' setting is disabled for future investigation
592-
context.close()
593-
594-
if container.open_files:
595-
container.close()
581+
return av.open(source)
582+
596583

597584
def decode_stream(
598585
self, container: av.container.Container, video_stream: av.video.stream.VideoStream
@@ -679,13 +666,10 @@ def iterate_frames(
679666
with closing(self._decode_stream(container, video_stream)) as stream_decoder:
680667
for frame, frame_number in zip(stream_decoder, frame_counter):
681668
if frame_number == next_frame_filter_frame:
682-
if video_stream.metadata.get('rotate'):
669+
if frame.rotation:
683670
pts = frame.pts
684671
frame = av.VideoFrame().from_ndarray(
685-
rotate_image(
686-
frame.to_ndarray(format='bgr24'),
687-
360 - int(video_stream.metadata.get('rotate'))
688-
),
672+
rotate_image(frame.to_ndarray(format='bgr24'), frame.rotation),
689673
format ='bgr24'
690674
)
691675
frame.pts = pts
@@ -707,7 +691,7 @@ def get_progress(self, pos):
707691
duration = self._get_duration()
708692
return pos / duration if duration else None
709693

710-
def _read_av_container(self) -> AbstractContextManager[av.container.InputContainer]:
694+
def _read_av_container(self) -> av.container.InputContainer:
711695
return _AvVideoReading().read_av_container(self._source_path[0])
712696

713697
def _decode_stream(
@@ -798,7 +782,7 @@ def __init__(self, manifest_path: str, source_path: str, *, allow_threading: boo
798782

799783
self.allow_threading = allow_threading
800784

801-
def _read_av_container(self) -> AbstractContextManager[av.container.InputContainer]:
785+
def _read_av_container(self) -> av.container.InputContainer:
802786
return _AvVideoReading().read_av_container(self.source_path)
803787

804788
def _decode_stream(
@@ -843,12 +827,9 @@ def iterate_frames(self, *, frame_filter: Iterable[int]) -> Iterable[av.VideoFra
843827
with closing(self._decode_stream(container, video_stream)) as stream_decoder:
844828
for frame, frame_number in zip(stream_decoder, frame_counter):
845829
if frame_number == next_frame_filter_frame:
846-
if video_stream.metadata.get('rotate'):
830+
if frame.rotation:
847831
frame = av.VideoFrame().from_ndarray(
848-
rotate_image(
849-
frame.to_ndarray(format='bgr24'),
850-
360 - int(video_stream.metadata.get('rotate'))
851-
),
832+
rotate_image(frame.to_ndarray(format='bgr24'), frame.rotation),
852833
format ='bgr24'
853834
)
854835

@@ -1050,7 +1031,7 @@ def __init__(self, quality=67):
10501031
"preset": "ultrafast",
10511032
}
10521033

1053-
def _add_video_stream(self, container: av.container.OutputContainer, w, h, rate, options):
1034+
def _add_video_stream(self, container: av.container.OutputContainer, w, h, rate, options) -> av.video.stream.VideoStream:
10541035
# x264 requires width and height must be divisible by 2 for yuv420p
10551036
if h % 2:
10561037
h += 1
@@ -1067,6 +1048,14 @@ def _add_video_stream(self, container: av.container.OutputContainer, w, h, rate,
10671048
video_stream.pix_fmt = "yuv420p"
10681049
video_stream.width = w
10691050
video_stream.height = h
1051+
1052+
if "profile" in options:
1053+
video_stream.profile = options["profile"]
1054+
if "qmin" in options:
1055+
video_stream.codec_context.qmin = int(options["qmin"])
1056+
video_stream.codec_context.qmax = int(options["qmax"])
1057+
options = {k: options[k] for k in options if k not in ("profile", "qmin", "qmax")}
1058+
10701059
video_stream.options = options
10711060

10721061
return video_stream
@@ -1103,8 +1092,7 @@ def save_as_chunk(
11031092
options=self._codec_opts,
11041093
)
11051094

1106-
with closing(output_v_stream):
1107-
self._encode_images(images, output_container, output_v_stream)
1095+
self._encode_images(images, output_container, output_v_stream)
11081096

11091097
return [(input_w, input_h)]
11101098

@@ -1115,7 +1103,7 @@ def _encode_images(
11151103
for frame, _, _ in images:
11161104
# let libav set the correct pts and time_base
11171105
frame.pts = None
1118-
frame.time_base = None
1106+
frame.time_base = Fraction(0, 1)
11191107

11201108
for packet in stream.encode(frame):
11211109
container.mux(packet)
@@ -1160,8 +1148,7 @@ def save_as_chunk(self, images, chunk_path):
11601148
options=self._codec_opts,
11611149
)
11621150

1163-
with closing(output_v_stream):
1164-
self._encode_images(images, output_container, output_v_stream)
1151+
self._encode_images(images, output_container, output_v_stream)
11651152

11661153
return [(input_w, input_h)]
11671154

cvat/requirements/base.in

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@
22

33
attrs==21.4.0
44

5-
# This is the last version of av that supports ffmpeg we depend on.
6-
# Changing ffmpeg is undesirable, as there might be video decoding differences
7-
# between versions.
8-
# TODO: try to move to the newer version
9-
av==9.2.0
5+
av==15.1.0
106

117
azure-storage-blob==12.13.0
128
boto3~=1.37

site/content/en/docs/administration/advanced/upgrade_guide.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ To upgrade CVAT, follow these steps:
5656
docker logs cvat_server -f
5757
```
5858

59+
## How to upgrade CVAT from v2.46.0 to v2.47.0.
60+
61+
In version 2.47.0, CVAT upgraded the FFmpeg library it uses to split videos into frames from 4.3.1 to 8.0.
62+
There is a small chance that some video files may not be processed differently by the new FFmpeg version.
63+
64+
If one of your tasks is affected, follow the guide in ./utils/ffmpeg_compatibility/README.md
65+
5966
## Upgrade CVAT after v2.26.0
6067

6168
In version 2.26.0, CVAT changed the location where the export cache is stored.

utils/dataset_manifest/core.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,13 @@ def __init__(self, source_path, chunk_size, force):
3636
for packet in container.demux(video_stream):
3737
for frame in packet.decode():
3838
# check type of first frame
39-
if not frame.pict_type.name == "I":
39+
if frame.pict_type != av.video.frame.PictureType.I:
4040
raise InvalidVideoError("The first frame is not a key frame")
4141

4242
# get video resolution
43-
if video_stream.metadata.get("rotate"):
43+
if frame.rotation:
4444
frame = av.VideoFrame().from_ndarray(
45-
rotate_image(
46-
frame.to_ndarray(format="bgr24"),
47-
360 - int(container.streams.video[0].metadata.get("rotate")),
48-
),
45+
rotate_image(frame.to_ndarray(format="bgr24"), frame.rotation),
4946
format="bgr24",
5047
)
5148
self.height, self.width = (frame.height, frame.width)

utils/dataset_manifest/requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
av==9.2.0 # Pinned for the whole CVAT
1+
av==15.1.0 # Pinned for the whole CVAT
22
natsort>=8.0.0
33
opencv-python-headless>=4.4.0.42
44
Pillow>=10.3.0

utils/dataset_manifest/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# SHA1:3671835f743ca6c6c8d49b36eda2bb7e0763fa0b
1+
# SHA1:c994feeb8bd193f610d522cfea809387b62748ab
22
#
33
# This file is automatically generated.
44
# To update it, refer to cvat/requirements/README.txt.
55
#
6-
av==9.2.0
6+
av==15.1.0
77
# via -r utils/dataset_manifest/requirements.in
88
natsort==8.0.0
99
# via -r utils/dataset_manifest/requirements.in
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Description
2+
In version 2.47.0, CVAT upgraded the FFMPEG library it uses to split videos into frames from 4.3.1 to 8.0.
3+
4+
There is a small chance that some video files may be split into frames differently by different FFmpeg versions.
5+
6+
In the case of any difference in frame decoding,
7+
this script may be used to switch a task to static chunks and generate frames with the old FFMPEG version.
8+
9+
> NOTE: This option requires administrator
10+
> access to the server instance. If you do not have such access, please try
11+
> to contact the server administration.
12+
13+
# Usage
14+
15+
If your CVAT is deployed through docker, run
16+
```shell
17+
docker compose \
18+
-f docker-compose.yml \
19+
\ # optionally -f docker-compose.dev.yml \
20+
-f ./utils/ffmpeg_compatibility/docker-compose.yml \
21+
run --rm generate_chunks_for_task <task_id>
22+
```
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
services:
2+
generate_chunks_for_task:
3+
image: cvat/server:v2.46.0
4+
environment:
5+
CVAT_POSTGRES_HOST: cvat_db
6+
depends_on:
7+
- cvat_db
8+
volumes:
9+
- cvat_data:/home/django/data
10+
- ./utils/ffmpeg_compatibility/switch_task_to_static_cache.py:/home/django/switch_task_to_static_cache.py:ro
11+
networks:
12+
- cvat
13+
entrypoint: ["python", "switch_task_to_static_cache.py"]

0 commit comments

Comments
 (0)