diff --git a/changelog.d/20251125_154952_max.christoph_chapters.md b/changelog.d/20251125_154952_max.christoph_chapters.md new file mode 100644 index 000000000000..819214b2758d --- /dev/null +++ b/changelog.d/20251125_154952_max.christoph_chapters.md @@ -0,0 +1,4 @@ +### Added + +- Show video chapters in the player progress as marks. Seek to chapters with a menu and with player navigation buttons. + () diff --git a/cvat-core/src/frames.ts b/cvat-core/src/frames.ts index a3695bcb04cc..a1d8e1f9d1f8 100644 --- a/cvat-core/src/frames.ts +++ b/cvat-core/src/frames.ts @@ -9,7 +9,7 @@ import { } from 'cvat-data'; import PluginRegistry from './plugins'; import serverProxy from './server-proxy'; -import { SerializedFramesMetaData } from './server-response-types'; +import { SerializedChapterMetaData, SerializedFramesMetaData } from './server-response-types'; import { ArgumentError } from './exceptions'; import { FieldUpdateTrigger } from './common'; import config from './config'; @@ -83,8 +83,28 @@ interface FramesMetaDataUpdatedData { deletedFrames: Record; } +export class ChapterMetaData { + readonly #title: string; + + constructor(initialData: SerializedChapterMetaData) { + this.#title = initialData.title; + } + + get title(): string { + return this.#title; + } +} + +export class Chapter { + public id: number; + public start: number; + public stop: number; + public metadata: ChapterMetaData; +} + export class FramesMetaData { public chunkSize: number; + public chapters: Chapter[] | null; public deletedFrames: Record; public includedFrames: number[] | null; public frameFilter: string; @@ -109,6 +129,7 @@ export class FramesMetaData { constructor(initialData: Omit & { deleted_frames: Record }) { const data: typeof initialData = { chunk_size: undefined, + chapters: [], deleted_frames: {}, included_frames: null, frame_filter: undefined, @@ -174,6 +195,9 @@ export class FramesMetaData { chunkSize: { get: () => data.chunk_size, }, + chapters: { + get: () => data.chapters, + }, deletedFrames: { get: () => data.deleted_frames, }, @@ -975,9 +999,13 @@ export async function patchMeta(id: number, meta?: FramesMetaData, session: 'job } export async function findFrame( - jobID: number, frameFrom: number, frameTo: number, filters: { offset?: number, notDeleted: boolean }, + jobID: number, + frameFrom: number, + frameTo: number, + filters: { offset?: number, notDeleted: boolean, chapterMark?: boolean }, ): Promise { const offset = filters.offset || 1; + const chapterMark = filters.chapterMark || false; const meta = await getFramesMeta('job', jobID); const sign = Math.sign(frameTo - frameFrom); @@ -997,6 +1025,11 @@ export async function findFrame( if (filters.notDeleted) { return !(frame in meta.deletedFrames); } + + if (chapterMark) { + return meta.chapters.some((chapter) => chapter.start === frame); + } + return true; }; for (let frame = frameFrom; predicate(frame); frame = update(frame)) { diff --git a/cvat-core/src/server-response-types.ts b/cvat-core/src/server-response-types.ts index 105ab635af2b..811baee93b43 100644 --- a/cvat-core/src/server-response-types.ts +++ b/cvat-core/src/server-response-types.ts @@ -493,8 +493,20 @@ export interface SerializedCloudStorage { manifests?: string[]; } +export interface SerializedChapterMetaData { + title: string; +} + +export interface SerializedChapter { + id: number; + start: number; + end: number; + metadata: SerializedChapterMetaData; +} + export interface SerializedFramesMetaData { chunk_size: number; + chapters: SerializedChapter[] | null deleted_frames: number[]; included_frames: number[] | null; frame_filter: string; diff --git a/cvat-core/src/session.ts b/cvat-core/src/session.ts index aff144865694..5ec54a218930 100644 --- a/cvat-core/src/session.ts +++ b/cvat-core/src/session.ts @@ -435,6 +435,7 @@ export class Session { filters: { offset?: number, notDeleted: boolean, + chapterMark?: boolean, }, frameFrom: number, frameTo: number, diff --git a/cvat-ui/src/actions/annotation-actions.ts b/cvat-ui/src/actions/annotation-actions.ts index 3a8ca22bd870..bb8ae3287d6a 100644 --- a/cvat-ui/src/actions/annotation-actions.ts +++ b/cvat-ui/src/actions/annotation-actions.ts @@ -155,6 +155,7 @@ export enum AnnotationActionTypes { SWITCH_Z_LAYER = 'SWITCH_Z_LAYER', ADD_Z_LAYER = 'ADD_Z_LAYER', SEARCH_ANNOTATIONS_FAILED = 'SEARCH_ANNOTATIONS_FAILED', + SEARCH_CHAPTERS_FAILED = 'SEARCH_CHAPTERS_FAILED', CHANGE_WORKSPACE = 'CHANGE_WORKSPACE', SAVE_LOGS_SUCCESS = 'SAVE_LOGS_SUCCESS', SAVE_LOGS_FAILED = 'SAVE_LOGS_FAILED', @@ -172,6 +173,16 @@ export enum AnnotationActionTypes { RESTORE_FRAME_FAILED = 'RESTORE_FRAME_FAILED', UPDATE_BRUSH_TOOLS_CONFIG = 'UPDATE_BRUSH_TOOLS_CONFIG', HIGHLIGHT_CONFLICT = 'HIGHLIGHT_CONFCLICT', + HOVERED_CHAPTER = 'HOVERED_CHAPTER', +} + +export function setHoveredChapter(id: number | null): AnyAction { + return { + type: AnnotationActionTypes.HOVERED_CHAPTER, + payload: { + id, + }, + }; } export function saveLogsAsync(): ThunkAction { @@ -1343,6 +1354,40 @@ export function searchAnnotationsAsync( }; } +export function searchChaptersAsync( + sessionInstance: NonNullable, + frameFrom: number, + frameTo: number, +) { + return async (dispatch: ThunkDispatch, getState: () => CombinedState): Promise => { + try { + const { + settings: { + player: { showDeletedFrames }, + }, + } = getState(); + + const frame = await sessionInstance.frames + .search( + { + notDeleted: showDeletedFrames, + chapterMark: true, + }, + frameFrom, + frameTo, + ); + if (frame !== null) { + dispatch(changeFrameAsync(frame)); + } + } catch (error) { + dispatch({ + type: AnnotationActionTypes.SEARCH_CHAPTERS_FAILED, + payload: { error }, + }); + } + }; +} + export const ShapeTypeToControl: Record = { [ShapeType.RECTANGLE]: ActiveControl.DRAW_RECTANGLE, [ShapeType.POLYLINE]: ActiveControl.DRAW_POLYLINE, diff --git a/cvat-ui/src/assets/chapter-menu.svg b/cvat-ui/src/assets/chapter-menu.svg new file mode 100644 index 000000000000..4d58976fe0e8 --- /dev/null +++ b/cvat-ui/src/assets/chapter-menu.svg @@ -0,0 +1,8 @@ + diff --git a/cvat-ui/src/assets/next_chapter_icon.svg b/cvat-ui/src/assets/next_chapter_icon.svg new file mode 100644 index 000000000000..3834245826cf --- /dev/null +++ b/cvat-ui/src/assets/next_chapter_icon.svg @@ -0,0 +1,12 @@ + + diff --git a/cvat-ui/src/assets/previous_chapter_icon.svg b/cvat-ui/src/assets/previous_chapter_icon.svg new file mode 100644 index 000000000000..e89561b05dad --- /dev/null +++ b/cvat-ui/src/assets/previous_chapter_icon.svg @@ -0,0 +1,10 @@ + + diff --git a/cvat-ui/src/components/annotation-page/styles.scss b/cvat-ui/src/components/annotation-page/styles.scss index 0f9a14515f83..6a811829fa6e 100644 --- a/cvat-ui/src/components/annotation-page/styles.scss +++ b/cvat-ui/src/components/annotation-page/styles.scss @@ -156,6 +156,33 @@ height: $grid-unit-size; background-color: $player-slider-color; } + + > .ant-slider-mark { + z-index: 2; + top: -16px; + + > .ant-slider-mark-text { + > .ant-slider-mark-chapter { + display: inline-block; + width: 2px; + height: 10px; + background-color: #ccc; + border-radius: 2px; + transition: all 0.1s ease; + transform-origin: center center; + transform: scaleX(1); + } + + > .ant-slider-mark-chapter.active { + transform: scaleX(1.5); + background-color: #ff4136; + } + } + } + + > .ant-slider-step > .ant-slider-dot { + z-index: 2; + } } .cvat-player-slider-progress { @@ -457,6 +484,28 @@ } } +.cvat-player-chapter-menu-wrapper { + max-height: 37 * $grid-unit-size; + max-width: 22 * $grid-unit-size; + overflow: auto; +} + +.cvat-player-chapter-menu-list { + .cvat-player-chapter-menu-list-item { + margin-right: 2px; + + &:hover { + transform: translateX(2px); + box-shadow: 0 2px 5px rgba(0,0,0,10%); + background-color: #f5f5f5; + } + } +} + +.cvat-player-chapters-menu-button svg { + transform: scale(0.8); +} + .cvat-annotations-filters-input.ant-select { > .ant-select-selector { height: 32px; @@ -473,7 +522,9 @@ .cvat-player-previous-filtered-inlined-button, .cvat-player-next-filtered-inlined-button, .cvat-player-previous-empty-inlined-button, -.cvat-player-next-empty-inlined-button { +.cvat-player-next-empty-inlined-button, +.cvat-player-previous-chapter-inlined-button, +.cvat-player-next-chapter-inlined-button { color: $player-buttons-color; &:not(:first-child) { diff --git a/cvat-ui/src/components/annotation-page/top-bar/chapter-menu.tsx b/cvat-ui/src/components/annotation-page/top-bar/chapter-menu.tsx new file mode 100644 index 000000000000..e89d8d21bf60 --- /dev/null +++ b/cvat-ui/src/components/annotation-page/top-bar/chapter-menu.tsx @@ -0,0 +1,87 @@ +// Copyright (C) CVAT.ai Corporation +// +// SPDX-License-Identifier: MIT + +import React from 'react'; +import Icon from '@ant-design/icons'; +import Popover from 'antd/lib/popover'; +import List from 'antd/lib/list'; +import CvatTooltip from 'components/common/cvat-tooltip'; +import { Chapter } from 'cvat-core/src/frames'; +import { ChapterMenuIcon } from 'icons'; + +interface Props { + chapters: Chapter[]; + onSelectChapter: (id: number) => void; + onHoveredChapter?: (id: number | null) => void; +} + +function ChapterMenu(props: Readonly): JSX.Element { + const { + chapters, + onSelectChapter, + onHoveredChapter, + } = props; + + const content = ( +
+ { + const itemClass = 'cvat-player-chapter-menu-list-item'; + + return ( + onSelectChapter(chapter.id)} + onMouseEnter={() => onHoveredChapter?.(chapter.id)} + onMouseLeave={() => onHoveredChapter?.(null)} + > +
+ + + {chapter.id} + {': '} + + {chapter.metadata.title} + +
+ Frames + {' '} + {chapter.start} + - + {chapter.stop} +
+
+
+ + ); + }} + /> +
+ ); + + return ( + + + + + + + + ); +} + +export default React.memo(ChapterMenu); diff --git a/cvat-ui/src/components/annotation-page/top-bar/player-buttons.tsx b/cvat-ui/src/components/annotation-page/top-bar/player-buttons.tsx index 474903137455..d2eebe2327b3 100644 --- a/cvat-ui/src/components/annotation-page/top-bar/player-buttons.tsx +++ b/cvat-ui/src/components/annotation-page/top-bar/player-buttons.tsx @@ -11,22 +11,26 @@ import CVATTooltip from 'components/common/cvat-tooltip'; import GlobalHotKeys, { KeyMap } from 'utils/mousetrap-react'; import { NavigationType, Workspace } from 'reducers'; import { - FirstIcon, BackJumpIcon, - PreviousIcon, - PreviousFilteredIcon, - PreviousEmptyIcon, - PlayIcon, - PauseIcon, - NextIcon, - NextFilteredIcon, - NextEmptyIcon, + FirstIcon, ForwardJumpIcon, LastIcon, + NextChapterIcon, + NextEmptyIcon, + NextFilteredIcon, + NextIcon, + PauseIcon, + PlayIcon, + PreviousChapterIcon, + PreviousEmptyIcon, + PreviousFilteredIcon, + PreviousIcon, } from 'icons'; import { ShortcutScope } from 'utils/enums'; import { registerComponentShortcuts } from 'actions/shortcuts-actions'; import { subKeyMap } from 'utils/component-subkeymap'; +import { Chapter } from 'cvat-core/src/frames'; +import ChapterMenu from './chapter-menu'; interface Props { playing: boolean; @@ -35,6 +39,7 @@ interface Props { previousFrameShortcut: string; forwardShortcut: string; backwardShortcut: string; + chapters: Chapter[]; keyMap: KeyMap; workspace: Workspace; navigationType: NavigationType; @@ -46,6 +51,9 @@ interface Props { onFirstFrame(): void; onLastFrame(): void; onSearchAnnotations(direction: 'forward' | 'backward'): void; + onSearchChapters(direction: 'forward' | 'backward'): void; + onHoveredChapter(id: number | null): void; + onSelectChapter(id: number): void; setNavigationType(navigationType: NavigationType): void; } @@ -86,6 +94,18 @@ const componentShortcuts = { sequences: ['left'], scope: ShortcutScope.ANNOTATION_PAGE, }, + CHAPTER_BACKWARD: { + name: 'Chapter backward', + description: 'Go to the previous chapter', + sequences: ['x'], + scope: ShortcutScope.ANNOTATION_PAGE, + }, + CHAPTER_FORWARD: { + name: 'Chapter forward', + description: 'Go to the next chapter', + sequences: ['b'], + scope: ShortcutScope.ANNOTATION_PAGE, + }, PLAY_PAUSE: { name: 'Play/pause', description: 'Start/stop automatic changing frames', @@ -105,6 +125,7 @@ function PlayerButtons(props: Props): JSX.Element { forwardShortcut, backwardShortcut, keyMap, + chapters, navigationType, workspace, onSwitchPlay, @@ -116,6 +137,9 @@ function PlayerButtons(props: Props): JSX.Element { onLastFrame, setNavigationType, onSearchAnnotations, + onSearchChapters, + onHoveredChapter, + onSelectChapter, } = props; const handlers: Partial void)>> = { @@ -144,6 +168,14 @@ function PlayerButtons(props: Props): JSX.Element { event?.preventDefault(); onSearchAnnotations('backward'); }, + CHAPTER_BACKWARD: (event: KeyboardEvent | undefined) => { + event?.preventDefault(); + onSearchChapters('backward'); + }, + CHAPTER_FORWARD: (event: KeyboardEvent | undefined) => { + event?.preventDefault(); + onSearchChapters('forward'); + }, PLAY_PAUSE: (event: KeyboardEvent | undefined) => { event?.preventDefault(); onSwitchPlay(); @@ -154,9 +186,11 @@ function PlayerButtons(props: Props): JSX.Element { const prevRegularText = 'Go back'; const prevFilteredText = 'Go back with a filter'; const prevEmptyText = 'Go back to an empty frame'; + const prevChapterText = 'Go to the previous chapter'; const nextRegularText = 'Go next'; const nextFilteredText = 'Go next with a filter'; const nextEmptyText = 'Go next to an empty frame'; + const nextChapterText = 'Go to the next chapter'; let prevButton = ; let prevButtonTooltipMessage = prevRegularText; @@ -174,6 +208,11 @@ function PlayerButtons(props: Props): JSX.Element { ); prevButtonTooltipMessage = prevEmptyText; + } else if (navigationType === NavigationType.CHAPTER) { + prevButton = ( + + ); + prevButtonTooltipMessage = prevChapterText; } let nextButton = ; @@ -186,6 +225,11 @@ function PlayerButtons(props: Props): JSX.Element { } else if (navigationType === NavigationType.EMPTY) { nextButton = ; nextButtonTooltipMessage = nextEmptyText; + } else if (navigationType === NavigationType.CHAPTER) { + nextButton = ( + + ); + nextButtonTooltipMessage = nextChapterText; } const navIconStyle: CSSProperties = workspace === Workspace.SINGLE_SHAPE ? { @@ -196,6 +240,14 @@ function PlayerButtons(props: Props): JSX.Element { return ( + { (chapters.length > 0) && ( + + + )} setNavigationType(NavigationType.EMPTY)} /> + + setNavigationType(NavigationType.CHAPTER)} + /> + )} > @@ -292,6 +351,13 @@ function PlayerButtons(props: Props): JSX.Element { onClick={() => setNavigationType(NavigationType.EMPTY)} /> + + setNavigationType(NavigationType.CHAPTER)} + /> + )} > diff --git a/cvat-ui/src/components/annotation-page/top-bar/player-navigation.tsx b/cvat-ui/src/components/annotation-page/top-bar/player-navigation.tsx index bd8bc7d7ebf3..90d8fceca932 100644 --- a/cvat-ui/src/components/annotation-page/top-bar/player-navigation.tsx +++ b/cvat-ui/src/components/annotation-page/top-bar/player-navigation.tsx @@ -11,10 +11,11 @@ import { Row, Col } from 'antd/lib/grid'; import Icon, { LinkOutlined, DeleteOutlined, CopyOutlined, SearchOutlined, } from '@ant-design/icons'; -import Slider from 'antd/lib/slider'; +import Slider, { SliderMarks } from 'antd/lib/slider'; import InputNumber from 'antd/lib/input-number'; import Text from 'antd/lib/typography/Text'; import Modal from 'antd/lib/modal'; +import Tooltip from 'antd/lib/tooltip'; import { Workspace, CombinedState } from 'reducers'; import { RestoreIcon } from 'icons'; @@ -24,6 +25,7 @@ import { clamp } from 'utils/math'; import GlobalHotKeys, { KeyMap } from 'utils/mousetrap-react'; import { ShortcutScope } from 'utils/enums'; import { subKeyMap } from 'utils/component-subkeymap'; +import { Chapter } from 'cvat-core/src/frames'; import { usePlugins } from 'utils/hooks'; interface Props { @@ -32,6 +34,8 @@ interface Props { playing: boolean; ranges: string; frameNumber: number; + chapters: Chapter[] | null; + hoveredChapter: number | null; frameFilename: string; frameDeleted: boolean; deleteFrameShortcut: string; @@ -79,6 +83,8 @@ function PlayerNavigation(props: Props): JSX.Element { const { startFrame, stopFrame, + chapters, + hoveredChapter, playing, frameNumber, frameFilename, @@ -162,6 +168,18 @@ function PlayerNavigation(props: Props): JSX.Element { opacity: 0.5, } : {}; + const marks: SliderMarks = (chapters ?? []).reduce((acc, chapter) => { + const active = hoveredChapter === chapter.id; + const innerAcc = acc ?? {}; + innerAcc[chapter.start] = { + label: + + + , + }; + return innerAcc; + }, {}); + const deleteFrameIcon = !frameDeleted ? ( diff --git a/cvat-ui/src/components/annotation-page/top-bar/top-bar.tsx b/cvat-ui/src/components/annotation-page/top-bar/top-bar.tsx index c9f7a555780e..782c9992d100 100644 --- a/cvat-ui/src/components/annotation-page/top-bar/top-bar.tsx +++ b/cvat-ui/src/components/annotation-page/top-bar/top-bar.tsx @@ -11,6 +11,7 @@ import { } from 'reducers'; import { Job } from 'cvat-core-wrapper'; import { KeyMap } from 'utils/mousetrap-react'; +import { Chapter } from 'cvat-core/src/frames'; import LeftGroup from './left-group'; import PlayerButtons from './player-buttons'; import PlayerNavigation from './player-navigation'; @@ -19,6 +20,8 @@ import RightGroup from './right-group'; interface Props { playing: boolean; saving: boolean; + chapters: Chapter[]; + hoveredChapter: number | null; frameNumber: number; frameFilename: string; frameDeleted: boolean; @@ -60,6 +63,9 @@ interface Props { onFirstFrame(): void; onLastFrame(): void; onSearchAnnotations(direction: 'forward' | 'backward'): void; + onSearchChapters(direction: 'forward' | 'backward'): void; + onSelectChapter(id: number): void; + setHoveredChapter(id: number | null): void; onSliderChange(value: number): void; onInputChange(value: number): void; onURLIconClick(): void; @@ -81,6 +87,8 @@ export default function AnnotationTopBarComponent(props: Props): JSX.Element { undoAction, redoAction, playing, + chapters, + hoveredChapter, ranges, frameNumber, frameFilename, @@ -119,6 +127,9 @@ export default function AnnotationTopBarComponent(props: Props): JSX.Element { onFirstFrame, onLastFrame, onSearchAnnotations, + onSearchChapters, + onSelectChapter, + setHoveredChapter, onSliderChange, onInputChange, onURLIconClick, @@ -147,6 +158,7 @@ export default function AnnotationTopBarComponent(props: Props): JSX.Element { forwardShortcut={forwardShortcut} backwardShortcut={backwardShortcut} navigationType={navigationType} + chapters={chapters} keyMap={keyMap} workspace={workspace} onPrevFrame={onPrevFrame} @@ -157,6 +169,9 @@ export default function AnnotationTopBarComponent(props: Props): JSX.Element { onLastFrame={onLastFrame} onSwitchPlay={onSwitchPlay} onSearchAnnotations={onSearchAnnotations} + onSearchChapters={onSearchChapters} + onHoveredChapter={setHoveredChapter} + onSelectChapter={onSelectChapter} setNavigationType={setNavigationType} /> ), 0]); @@ -167,6 +182,8 @@ export default function AnnotationTopBarComponent(props: Props): JSX.Element { startFrame={startFrame} stopFrame={stopFrame} playing={playing} + chapters={chapters} + hoveredChapter={hoveredChapter} ranges={ranges} frameNumber={frameNumber} frameFilename={frameFilename} diff --git a/cvat-ui/src/containers/annotation-page/top-bar/top-bar.tsx b/cvat-ui/src/containers/annotation-page/top-bar/top-bar.tsx index ac689a07b328..9c92d00881b1 100644 --- a/cvat-ui/src/containers/annotation-page/top-bar/top-bar.tsx +++ b/cvat-ui/src/containers/annotation-page/top-bar/top-bar.tsx @@ -11,32 +11,29 @@ import { RouteComponentProps } from 'react-router-dom'; import { changeFrameAsync, changeWorkspace as changeWorkspaceAction, + setHoveredChapter as setHoveredChapterAction, collectStatisticsAsync, + deleteFrameAsync, redoActionAsync, + restoreFrameAsync, saveAnnotationsAsync, searchAnnotationsAsync, + searchChaptersAsync, setForceExitAnnotationFlag as setForceExitAnnotationFlagAction, + setNavigationType as setNavigationTypeAction, showFilters as showFiltersAction, showStatistics as showStatisticsAction, - switchPlay, - undoActionAsync, - deleteFrameAsync, - restoreFrameAsync, switchNavigationBlocked as switchNavigationBlockedAction, - setNavigationType as setNavigationTypeAction, + switchPlay, switchShowSearchFramesModal as switchShowSearchFramesModalAction, + undoActionAsync, } from 'actions/annotation-actions'; import AnnotationTopBarComponent from 'components/annotation-page/top-bar/top-bar'; import { Canvas } from 'cvat-canvas-wrapper'; import { Canvas3d } from 'cvat-canvas3d-wrapper'; import { FramesMetaData, Job } from 'cvat-core-wrapper'; import { - CombinedState, - FrameSpeed, - Workspace, - ActiveControl, - ToolsBlockerState, - NavigationType, + ActiveControl, CombinedState, FrameSpeed, NavigationType, ToolsBlockerState, Workspace, } from 'reducers'; import isAbleToChangeFrame from 'utils/is-able-to-change-frame'; import { KeyMap } from 'utils/mousetrap-react'; @@ -44,8 +41,11 @@ import { switchToolsBlockerState } from 'actions/settings-actions'; import { writeLatestFrame } from 'utils/remember-latest-frame'; import { finishDraw } from 'utils/drawing'; import { toClipboard } from 'utils/to-clipboard'; +import { Chapter } from 'cvat-core/src/frames'; interface StateToProps { + chapters: Chapter[]; + hoveredChapter: number | null; jobInstance: Job; frameIsDeleted: boolean; frameNumber: number; @@ -93,8 +93,14 @@ interface DispatchToProps { isEmptyFrame: boolean; }, ): void; + searchChapters( + sessionInstance: Job, + frameFrom: number, + frameTo: number, + ): void; setForceExitAnnotationFlag(forceExit: boolean): void; changeWorkspace(workspace: Workspace): void; + setHoveredChapter(id: number | null): void; onSwitchToolsBlockerState(toolsBlockerState: ToolsBlockerState): void; deleteFrame(frame: number): void; restoreFrame(frame: number): void; @@ -116,6 +122,7 @@ function mapStateToProps(state: CombinedState): StateToProps { fetching: frameFetching, }, navigationType, + hoveredChapter, }, annotations: { saving: { uploading: saving, forceExit }, @@ -145,7 +152,10 @@ function mapStateToProps(state: CombinedState): StateToProps { ); } + const chapters = meta?.chapters ?? []; + return { + chapters, frameIsDeleted, frameStep, frameSpeed, @@ -153,6 +163,7 @@ function mapStateToProps(state: CombinedState): StateToProps { frameFetching, playing, canvasIsReady, + hoveredChapter, saving, frameNumber, frameFilename, @@ -214,9 +225,19 @@ function mapDispatchToProps(dispatch: any): DispatchToProps { ): void { dispatch(searchAnnotationsAsync(sessionInstance, frameFrom, frameTo, generalFilters)); }, + searchChapters( + sessionInstance: Job, + frameFrom: number, + frameTo: number, + ) { + dispatch(searchChaptersAsync(sessionInstance, frameFrom, frameTo)); + }, changeWorkspace(workspace: Workspace): void { dispatch(changeWorkspaceAction(workspace)); }, + setHoveredChapter(id: number | null) { + dispatch(setHoveredChapterAction(id)); + }, setForceExitAnnotationFlag(forceExit: boolean): void { dispatch(setForceExitAnnotationFlagAction(forceExit)); }, @@ -419,7 +440,7 @@ class AnnotationTopBarContainer extends React.PureComponent { private onPrevFrame = async (): Promise => { const { frameNumber, jobInstance, playing, searchAnnotations, - onSwitchPlay, showDeletedFrames, navigationType, + onSwitchPlay, showDeletedFrames, navigationType, searchChapters, } = this.props; const { startFrame } = jobInstance; @@ -439,6 +460,8 @@ class AnnotationTopBarContainer extends React.PureComponent { this.changeFrame(newFrame); } else if (navigationType === NavigationType.FILTERED) { searchAnnotations(jobInstance, newFrame, startFrame); + } else if (navigationType === NavigationType.CHAPTER) { + searchChapters(jobInstance, newFrame, startFrame); } else { searchAnnotations(jobInstance, newFrame, startFrame, { isEmptyFrame: true }); } @@ -447,7 +470,7 @@ class AnnotationTopBarContainer extends React.PureComponent { private onNextFrame = async (): Promise => { const { - frameNumber, jobInstance, playing, searchAnnotations, + frameNumber, jobInstance, playing, searchAnnotations, searchChapters, onSwitchPlay, showDeletedFrames, navigationType, } = this.props; const { stopFrame } = jobInstance; @@ -467,6 +490,8 @@ class AnnotationTopBarContainer extends React.PureComponent { this.changeFrame(newFrame); } else if (navigationType === NavigationType.FILTERED) { searchAnnotations(jobInstance, newFrame, stopFrame); + } else if (navigationType === NavigationType.CHAPTER) { + searchChapters(jobInstance, newFrame, stopFrame); } else { searchAnnotations(jobInstance, newFrame, stopFrame, { isEmptyFrame: true }); } @@ -524,6 +549,36 @@ class AnnotationTopBarContainer extends React.PureComponent { } }; + private readonly searchChapters = (direction: 'forward' | 'backward'): void => { + const { + frameNumber, jobInstance, searchChapters, + } = this.props; + const { startFrame, stopFrame } = jobInstance; + + if (isAbleToChangeFrame()) { + if (direction === 'forward' && frameNumber + 1 <= stopFrame) { + searchChapters(jobInstance, frameNumber + 1, stopFrame); + } else if (direction === 'backward' && frameNumber - 1 >= startFrame) { + searchChapters(jobInstance, frameNumber - 1, startFrame); + } + } + }; + + private readonly selectChapter = async (id: number): Promise => { + const { + chapters, playing, onSwitchPlay, + } = this.props; + + const selectedChapter = chapters.find((chapter: Chapter) => chapter.id === id) ?? null; + + if (selectedChapter !== null) { + if (playing) { + onSwitchPlay(false); + } + this.changeFrame(selectedChapter.start); + } + }; + private onChangePlayerSliderValue = async (value: number): Promise => { const { playing, onSwitchPlay, jobInstance, showDeletedFrames, @@ -603,6 +658,11 @@ class AnnotationTopBarContainer extends React.PureComponent { } }; + private readonly setHoveredChapter = (id: number | null): void => { + const { setHoveredChapter } = this.props; + setHoveredChapter(id); + }; + private beforeUnloadCallback = (event: BeforeUnloadEvent): string | undefined => { const { jobInstance, forceExit, setForceExitAnnotationFlag } = this.props; const { frameNumber } = this.props; @@ -640,6 +700,8 @@ class AnnotationTopBarContainer extends React.PureComponent { const { playing, saving, + chapters, + hoveredChapter, jobInstance, jobInstance: { startFrame, stopFrame }, frameNumber, @@ -674,6 +736,9 @@ class AnnotationTopBarContainer extends React.PureComponent { onFirstFrame={this.onFirstFrame} onLastFrame={this.onLastFrame} onSearchAnnotations={this.searchAnnotations} + onSearchChapters={this.searchChapters} + onSelectChapter={this.selectChapter} + setHoveredChapter={this.setHoveredChapter} setNavigationType={setNavigationType} onSliderChange={this.onChangePlayerSliderValue} onInputChange={this.onChangePlayerInputValue} @@ -688,6 +753,8 @@ class AnnotationTopBarContainer extends React.PureComponent { keyMap={keyMap} workspace={workspace} playing={playing} + chapters={chapters} + hoveredChapter={hoveredChapter} saving={saving} ranges={ranges} startFrame={startFrame} diff --git a/cvat-ui/src/icons.tsx b/cvat-ui/src/icons.tsx index 9c5a7d97b4ca..1b0e99aa2ce2 100644 --- a/cvat-ui/src/icons.tsx +++ b/cvat-ui/src/icons.tsx @@ -25,15 +25,18 @@ import SVGSaveIcon from './assets/save-icon.svg'; import SVGUndoIcon from './assets/undo-icon.svg'; import SVGRedoIcon from './assets/redo-icon.svg'; import SVGFirstIcon from './assets/first-icon.svg'; +import SVGChapterMenuIcon from './assets/chapter-menu.svg'; import SVGBackJumpIcon from './assets/back-jump-icon.svg'; import SVGPreviousIcon from './assets/previous-icon.svg'; import SVGPreviousFilteredIcon from './assets/previous-filtered-icon.svg'; import SVGPreviousEmptyIcon from './assets/previous-empty-icon.svg'; +import SVGPreviousChapterIcon from './assets/previous_chapter_icon.svg'; import SVGPlayIcon from './assets/play-icon.svg'; import SVGPauseIcon from './assets/pause-icon.svg'; import SVGNextIcon from './assets/next-icon.svg'; import SVGNextFilteredIcon from './assets/next-filtered-icon.svg'; import SVGNextEmptyIcon from './assets/next-empty-icon.svg'; +import SVGNextChapterIcon from './assets/next_chapter_icon.svg'; import SVGForwardJumpIcon from './assets/forward-jump-icon.svg'; import SVGLastIcon from './assets/last-icon.svg'; import SVGFullscreenIcon from './assets/fullscreen-icon.svg'; @@ -91,15 +94,18 @@ export const SaveIcon = React.memo((): JSX.Element => ); export const UndoIcon = React.memo((): JSX.Element => ); export const RedoIcon = React.memo((): JSX.Element => ); export const FirstIcon = React.memo((): JSX.Element => ); +export const ChapterMenuIcon = React.memo((): JSX.Element => ); export const BackJumpIcon = React.memo((): JSX.Element => ); export const PreviousIcon = React.memo((): JSX.Element => ); export const PreviousFilteredIcon = React.memo((): JSX.Element => ); export const PreviousEmptyIcon = React.memo((): JSX.Element => ); +export const PreviousChapterIcon = React.memo((): JSX.Element => ); export const PauseIcon = React.memo((): JSX.Element => ); export const PlayIcon = React.memo((): JSX.Element => ); export const NextIcon = React.memo((): JSX.Element => ); export const NextFilteredIcon = React.memo((): JSX.Element => ); export const NextEmptyIcon = React.memo((): JSX.Element => ); +export const NextChapterIcon = React.memo((): JSX.Element => ); export const ForwardJumpIcon = React.memo((): JSX.Element => ); export const LastIcon = React.memo((): JSX.Element => ); export const FullscreenIcon = React.memo((): JSX.Element => ); diff --git a/cvat-ui/src/reducers/annotation-reducer.ts b/cvat-ui/src/reducers/annotation-reducer.ts index 38bd7b8c4808..01a1e856e525 100644 --- a/cvat-ui/src/reducers/annotation-reducer.ts +++ b/cvat-ui/src/reducers/annotation-reducer.ts @@ -105,6 +105,7 @@ const defaultState: AnnotationState = { playing: false, frameAngles: [], navigationBlocked: false, + hoveredChapter: null, }, drawing: { activeShapeType: ShapeType.RECTANGLE, @@ -293,6 +294,15 @@ export default (state = defaultState, action: AnyAction): AnnotationState => { }, }; } + case AnnotationActionTypes.HOVERED_CHAPTER: { + return { + ...state, + player: { + ...state.player, + hoveredChapter: action.payload.id, + }, + }; + } case JobsActionTypes.UPDATE_JOB_SUCCESS: { return { ...state, diff --git a/cvat-ui/src/reducers/index.ts b/cvat-ui/src/reducers/index.ts index f866b4e8aae7..4a5e7abfda4d 100644 --- a/cvat-ui/src/reducers/index.ts +++ b/cvat-ui/src/reducers/index.ts @@ -805,6 +805,7 @@ export enum NavigationType { REGULAR = 'regular', FILTERED = 'filtered', EMPTY = 'empty', + CHAPTER = 'chapter', } export interface EditingState { @@ -875,6 +876,7 @@ export interface AnnotationState { navigationBlocked: boolean; playing: boolean; frameAngles: number[]; + hoveredChapter: number | null; }; drawing: { activeInteractor?: MLModel | OpenCVTool; diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 2d7e821a17f4..a0dca86c3d82 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -20,7 +20,7 @@ from enum import IntEnum from fractions import Fraction from random import shuffle -from typing import Any, ClassVar, Optional, Protocol, TypeVar, Union +from typing import Any, ClassVar, Optional, Protocol, TypedDict, TypeVar, Union import av import av.codec @@ -48,6 +48,13 @@ ORIENTATION_EXIF_TAG = 274 +class Chapter(TypedDict): + id: int + metadata: dict[str, str] + start: int + stop: int + + class ORIENTATION(IntEnum): NORMAL_HORIZONTAL = 1 MIRROR_HORIZONTAL = 2 @@ -117,6 +124,18 @@ def load_image(image: tuple[str, str, str]) -> tuple[Image.Image, str, str]: return pil_img, image[1], image[2] +def get_video_chapters(manifest_path: str, segment: tuple[int, int] = None) -> list[Chapter]: + manifest = VideoManifestManager(manifest_path) + + chapters = manifest.chapters + + if segment: + chapters = [ + chapter for chapter in manifest.chapters if segment[0] <= chapter["start"] <= segment[1] + ] + return chapters + + _T = TypeVar("_T") diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 4800a3648173..475cba10fbf4 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -2953,6 +2953,15 @@ class FrameMetaSerializer(serializers.Serializer): def get_has_related_context(self, obj: dict) -> bool: return obj['related_files'] != 0 +class ChapterMetadataSerializer(serializers.Serializer): + title = serializers.CharField(required=False) + +class ChapterSerializer(serializers.Serializer): + id = serializers.IntegerField() + start = serializers.IntegerField() + stop = serializers.IntegerField() + metadata = ChapterMetadataSerializer(many=False) + class PluginsSerializer(serializers.Serializer): GIT_INTEGRATION = serializers.BooleanField() ANALYTICS = serializers.BooleanField() @@ -2961,6 +2970,7 @@ class PluginsSerializer(serializers.Serializer): class DataMetaReadSerializer(serializers.ModelSerializer): frames = FrameMetaSerializer(many=True, allow_null=True) + chapters = ChapterSerializer(many=True, allow_null=True, required=False) image_quality = serializers.IntegerField(min_value=0, max_value=100) deleted_frames = serializers.ListField(child=serializers.IntegerField(min_value=0)) included_frames = serializers.ListField( @@ -2973,6 +2983,7 @@ class DataMetaReadSerializer(serializers.ModelSerializer): class Meta: model = models.Data fields = ( + 'chapters', 'chunks_updated_date', 'chunk_size', 'size', diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 00f2a760bfcd..3f41440432bd 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -66,7 +66,7 @@ JobFrameProvider, TaskFrameProvider, ) -from cvat.apps.engine.media_extractors import get_mime +from cvat.apps.engine.media_extractors import get_mime, get_video_chapters from cvat.apps.engine.mixins import BackupMixin, DatasetMixin, PartialUpdateModelMixin, UploadMixin from cvat.apps.engine.model_utils import bulk_create from cvat.apps.engine.models import ( @@ -1479,8 +1479,10 @@ def metadata(self, request: ExtendedRequest, pk: int): if hasattr(db_task.data, 'video'): media = [db_task.data.video] + chapters = get_video_chapters(db_task.data.get_manifest_path()) else: media = list(db_task.data.images.all()) + chapters = None frame_meta = [{ 'width': item.width, @@ -1492,6 +1494,7 @@ def metadata(self, request: ExtendedRequest, pk: int): db_data = db_task.data db_data.frames = frame_meta db_data.chunks_updated_date = db_task.get_chunks_updated_date() + db_data.chapters = chapters serializer = DataMetaReadSerializer(db_data) return Response(serializer.data) @@ -1984,6 +1987,10 @@ def metadata(self, request: ExtendedRequest, pk: int): if hasattr(db_data, 'video'): media = [db_data.video] + chapters = get_video_chapters( + db_task.data.get_manifest_path(), + segment=(data_start_frame, data_stop_frame) + ) else: media = [ # Insert placeholders if frames are skipped @@ -1995,6 +2002,7 @@ def metadata(self, request: ExtendedRequest, pk: int): for f in db_data.images.all() if f.frame in range(data_start_frame, data_stop_frame + frame_step, frame_step) ] + chapters = None deleted_frames = set(db_data.deleted_frames) if db_job.type == models.JobType.GROUND_TRUTH: @@ -2021,6 +2029,7 @@ def metadata(self, request: ExtendedRequest, pk: int): } for item in media] db_data.frames = frame_meta + db_data.chapters = chapters serializer = DataMetaReadSerializer(db_data) return Response(serializer.data) diff --git a/cvat/requirements/base.in b/cvat/requirements/base.in index cd8f5ffc34fd..38619afe308e 100644 --- a/cvat/requirements/base.in +++ b/cvat/requirements/base.in @@ -2,7 +2,7 @@ attrs==21.4.0 -av==15.1.0 +av==16.0.1 azure-storage-blob==12.13.0 boto3~=1.37 diff --git a/cvat/schema.yml b/cvat/schema.yml index e1324d69aacf..0c70db531d9a 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -7210,6 +7210,27 @@ components: maxLength: 150 required: - username + Chapter: + type: object + properties: + id: + type: integer + start: + type: integer + stop: + type: integer + metadata: + $ref: '#/components/schemas/ChapterMetadata' + required: + - id + - metadata + - start + - stop + ChapterMetadata: + type: object + properties: + title: + type: string ChunkType: enum: - video @@ -7456,6 +7477,11 @@ components: DataMetaRead: type: object properties: + chapters: + type: array + items: + $ref: '#/components/schemas/Chapter' + nullable: true chunks_updated_date: type: string format: date-time diff --git a/tests/cypress/e2e/features/shortcuts.js b/tests/cypress/e2e/features/shortcuts.js index 86591b53e01e..0fd6e76a16c1 100644 --- a/tests/cypress/e2e/features/shortcuts.js +++ b/tests/cypress/e2e/features/shortcuts.js @@ -4,6 +4,68 @@ /// +function tableContains(target) { + return cy + .get('.cvat-shortcuts-modal-window-table') + .then(($table) => $table.text().includes(target)); +} + +function assertTargetVisible(target) { + cy + .contains('.cvat-shortcuts-modal-window-table', target) + .should('be.visible'); +} + +function goToNextPageOrFail(target) { + return cy + .get('.cvat-shortcuts-modal-window-table .ant-pagination-next button') + .then(($btn) => { + const disabled = + $btn.is(':disabled') || + $btn.hasClass('ant-pagination-disabled'); + + if (disabled) { + throw new Error(`"${target}" not mounted.`); + } + + return cy.wrap($btn).click(); + }); +} + +function waitForPageChange(oldPage) { + cy.get('.ant-pagination-item-active') + .invoke('text') + .should('eq', String(oldPage + 1)); +} + +function getCurrentPage() { + return cy + .get('.ant-pagination-item-active') + .invoke('text') + .then(Number); +} + +function searchAcrossPages(target) { + cy.get('.cvat-shortcuts-modal-window-table').should('exist').and('be.visible'); + + const waitAndContinueSearching = (t, oldPage) => { + goToNextPageOrFail(t).then(() => { + waitForPageChange(oldPage); + searchAcrossPages(t); + }); + }; + + tableContains(target).then((found) => { + if (found) { + assertTargetVisible(target); + } else { + getCurrentPage().then((oldPage) => { + waitAndContinueSearching(target, oldPage); + }); + } + }); +} + context('Customizable Shortcuts', () => { const taskName = 'A task with markdown'; const serverFiles = ['images/image_1.jpg']; @@ -95,8 +157,9 @@ context('Customizable Shortcuts', () => { function checkShortcutsMounted(label) { cy.get('.cvat-shortcuts-modal-window-table').should('exist').and('be.visible'); + for (let i = 1; i < 3; i++) { - cy.get('.cvat-shortcuts-modal-window-table').contains(label(i)); + searchAcrossPages(label(i)); } } @@ -215,7 +278,7 @@ context('Customizable Shortcuts', () => { cy.get('.cvat-canvas-container').click(); cy.realPress(['F1']); cy.get('.cvat-shortcuts-modal-window').should('exist').and('be.visible'); - cy.get('.cvat-shortcuts-modal-window .ant-pagination-item-2').click(); + cy.get('.cvat-shortcuts-modal-window .ant-pagination-item-1').click(); checkShortcutsMounted((i) => `Create a new tag "label ${i}"`); cy.realPress(['F1']); }); @@ -224,7 +287,7 @@ context('Customizable Shortcuts', () => { cy.get('.cvat-canvas-container').click(); cy.realPress(['F1']); cy.get('.cvat-shortcuts-modal-window').should('exist').and('be.visible'); - cy.get('.cvat-shortcuts-modal-window .ant-pagination-item-3').click(); + cy.get('.cvat-shortcuts-modal-window .ant-pagination-item-2').click(); cy.get('.cvat-shortcuts-modal-window-table').should('exist').and('be.visible'); cy.get('.cvat-shortcuts-modal-window-table').contains('Assign attribute value false'); cy.get('.cvat-shortcuts-modal-window-table').contains('Assign attribute value true'); @@ -241,6 +304,7 @@ context('Customizable Shortcuts', () => { .within(() => { cy.contains('100 / page').click(); }); + cy.get('.cvat-shortcuts-modal-window .ant-pagination-item-1').click(); checkShortcutsMounted((i) => `Switch label to label ${i}`); cy.contains('.cvat-shortcuts-modal-window [type="button"]', 'OK').click(); }); diff --git a/tests/cypress/e2e/issues_prs2/pr_9924_video_chapters.js b/tests/cypress/e2e/issues_prs2/pr_9924_video_chapters.js new file mode 100644 index 000000000000..ef1d6730fed9 --- /dev/null +++ b/tests/cypress/e2e/issues_prs2/pr_9924_video_chapters.js @@ -0,0 +1,105 @@ +// Copyright (C) CVAT.ai Corporation +// +// SPDX-License-Identifier: MIT + +function checkFrameNum(frameNum) { + cy.get('.cvat-player-frame-selector').within(() => { + cy.get('input[role="spinbutton"]').should('have.value', frameNum); + }); +} + +function switchChapter(chapterNumber) { + cy.contains('.cvat-player-chapter-menu-list-item', `Kapitel ${chapterNumber}`) + .should('exist') + .and('be.visible') + .click(); +} + +function checkChapterNavigationButtons(direction, expectedSliderPos) { + cy.get(`.cvat-player-buttons > .cvat-player-${direction}-button`).rightclick(); + cy.get(`.cvat-player-${direction}-chapter-inlined-button`) + .should('exist') + .and('be.visible') + .click(); + cy.get(`.cvat-player-buttons > .cvat-player-${direction}-button-chapter`) + .should('exist') + .and('be.visible') + .click(); + checkFrameNum(expectedSliderPos); + cy.get(`.cvat-player-buttons > .cvat-player-${direction}-button-chapter`).rightclick(); + cy.get(`.cvat-player-${direction}-inlined-button`) + .should('exist') + .and('be.visible') + .click(); + cy.get(`.cvat-player-${direction}-button`) + .rightclick(); +} + +context('Video chapters', () => { + const taskName = 'Video task with chapters'; + const serverFiles = ['videos/video_with_chapters.mp4']; + + const task = { + name: taskName, + project_id: null, + source_storage: { location: 'local' }, + target_storage: { location: 'local' }, + }; + + const storage = { + server_files: serverFiles, + image_quality: 70, + use_cache: true, + }; + + let jobID = null; + let taskID = null; + + before(() => { + cy.visit('/auth/login'); + cy.login(); + cy.get('.cvat-tasks-page').should('exist').and('be.visible'); + cy.url().should('contain', '/tasks'); + cy.headlessCreateTask(task, storage).then((response) => { + taskID = response.taskID; + [jobID] = response.jobIDs; + }); + }); + + after(() => { + cy.logout(); + }); + + describe('Test chapter navigation buttons', () => { + it('Chapter forward', () => { + cy.visit(`/tasks/${taskID}/jobs/${jobID}`); + cy.get('.cvat-player-buttons').should('exist').and('be.visible'); + checkChapterNavigationButtons('next', '20'); + }); + it('Chapter backwards', () => { + checkChapterNavigationButtons('previous', '0'); + }); + }); + + describe('Test chapter navigation via shortcuts', () => { + it('Chapter forward (b)', () => { + cy.realPress('b'); + checkFrameNum('20'); + }); + it('Chapter backwards (x)', () => { + cy.realPress('x'); + checkFrameNum('0'); + }); + }); + + describe('Test chapter menu', () => { + it('Check menu overview', () => { + cy.get('.cvat-player-chapters-menu-button').click(); + cy.get('.cvat-player-chapter-menu-wrapper').should('exist').and('be.visible'); + switchChapter(2); + checkFrameNum('20'); + switchChapter(1); + checkFrameNum('0'); + }); + }); +}); diff --git a/tests/mounted_file_share/videos/video_with_chapters.mp4 b/tests/mounted_file_share/videos/video_with_chapters.mp4 new file mode 100644 index 000000000000..557bc1b56b84 Binary files /dev/null and b/tests/mounted_file_share/videos/video_with_chapters.mp4 differ diff --git a/tests/python/requirements.txt b/tests/python/requirements.txt index 5fca6639052f..e14d87d0198d 100644 --- a/tests/python/requirements.txt +++ b/tests/python/requirements.txt @@ -1,5 +1,5 @@ allure-pytest==2.14.2 -av==12.0.0 +av==16.0.1 pytest==6.2.5 pytest-cases==3.8.6 pytest-timeout==2.1.0 @@ -13,4 +13,4 @@ python-dateutil==2.8.2 pyyaml==6.0.2 numpy==2.0.0 -# TODO: update pytest to 7.0.0 and pytest-timeout to 2.3.1 (better debug in vscode) \ No newline at end of file +# TODO: update pytest to 7.0.0 and pytest-timeout to 2.3.1 (better debug in vscode) diff --git a/tests/python/rest_api/test_task_data.py b/tests/python/rest_api/test_task_data.py index f74e61a73f4e..6e1f3d3ea2ca 100644 --- a/tests/python/rest_api/test_task_data.py +++ b/tests/python/rest_api/test_task_data.py @@ -1056,7 +1056,7 @@ def test_create_task_with_cloud_storage_and_check_retrieve_data_meta( cloud_storage=cloud_storage, use_manifest=False, use_cache=False, - server_files=["test/video/video.avi"], + server_files=["test/video/video.mkv"], org=org, data_spec_kwargs=data_spec, data_type="video", @@ -1780,6 +1780,7 @@ def test_can_create_task_with_consensus(self, request: pytest.FixtureRequest): class TestTaskData(TestTasksBase): @parametrize("task_spec, task_id", TestTasksBase._all_task_cases) def test_can_get_task_meta(self, task_spec: ITaskSpec, task_id: int): + with make_api_client(self._USERNAME) as api_client: (task_meta, _) = api_client.tasks_api.retrieve_data_meta(task_id) @@ -1798,8 +1799,10 @@ def test_can_get_task_meta(self, task_spec: ITaskSpec, task_id: int): if task_spec.source_data_type == SourceDataType.video: assert len(task_meta.frames) == 1 + assert len(task_meta.chapters) == 1 else: assert len(task_meta.frames) == task_meta.size + assert task_meta.chapters is None @pytest.mark.timeout( # This test has to check all the task frames availability, it can make many requests diff --git a/tests/python/shared/utils/helpers.py b/tests/python/shared/utils/helpers.py index 2211cbe1448f..669e96499c46 100644 --- a/tests/python/shared/utils/helpers.py +++ b/tests/python/shared/utils/helpers.py @@ -4,7 +4,7 @@ import subprocess from collections.abc import Generator -from contextlib import closing +from fractions import Fraction from io import BytesIO from typing import Optional @@ -50,9 +50,19 @@ def generate_image_files( def generate_video_file(num_frames: int, size=(100, 50)) -> BytesIO: f = BytesIO() - f.name = "video.avi" + f.name = "video.mkv" + chapters = [ + { + "id": 0, + "start": 0, + "end": 100, + "time_base": Fraction(1, 1000), + "metadata": {"title": "Intro"}, + } + ] with av.open(f, "w") as container: + container.set_chapters(chapters) stream = container.add_stream("mjpeg", rate=60) stream.width = size[0] stream.height = size[1] @@ -74,11 +84,9 @@ def read_video_file(file: BytesIO) -> Generator[Image.Image, None, None]: with av.open(file) as container: video_stream = container.streams.video[0] - with closing(video_stream.codec_context): # pyav has a memory leak in stream.close() - with closing(container.demux(video_stream)) as demux_iter: - for packet in demux_iter: - for frame in packet.decode(): - yield frame.to_image() + for packet in container.demux(video_stream): + for frame in packet.decode(): + yield frame.to_image() def generate_manifest(path: str) -> None: diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index f6cc304cc4ee..28f96fddf86b 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -7,6 +7,7 @@ import json import os from abc import ABC, abstractmethod +from bisect import bisect_left, insort from collections.abc import Callable, Iterable, Iterator from contextlib import closing from enum import Enum @@ -31,6 +32,7 @@ class VideoStreamReader: def __init__(self, source_path, chunk_size, force): self._source_path = source_path self._frames_number = None + self._chapters = None self._force = force self._upper_bound = 3 * chunk_size + 1 @@ -62,6 +64,23 @@ def _get_video_stream(container): video_stream.thread_type = "AUTO" return video_stream + @staticmethod + def _get_chapters(container): + chapters = container.chapters() + stream = VideoStreamReader._get_video_stream(container) + stream_tb = stream.time_base + rescale_q = lambda q, src, dest: int(q * src / dest + 0.5) + output_chapters = [] + for chapter in chapters: + output_chapter = { + "start": rescale_q(chapter["start"], chapter["time_base"], stream_tb), + "end": rescale_q(chapter["end"], chapter["time_base"], stream_tb), + "metadata": chapter["metadata"], + "id": chapter["id"], + } + output_chapters.append(output_chapter) + return output_chapters + def __len__(self): assert ( self._frames_number is not None @@ -73,6 +92,10 @@ def __len__(self): def resolution(self): return (self.width, self.height) + @property + def chapters(self): + return self._chapters + def validate_key_frame( self, container: av.container.InputContainer, @@ -103,6 +126,23 @@ def validate_key_frame( return seek_pts return None + @staticmethod + def _find_closest_pts(pts_list, target_pts): + if not pts_list: + return None + + pos = bisect_left(pts_list, target_pts) + + if pos == 0: + return 0 + if pos == len(pts_list): + return len(pts_list) - 1 + + before = pts_list[pos - 1] + after = pts_list[pos] + + return pos if abs(after - target_pts) < abs(before - target_pts) else pos - 1 + def __iter__(self) -> Iterator[int | tuple[int, int, str]]: """ Iterate over video frames and yield key frames or indexes. @@ -117,6 +157,8 @@ def __iter__(self) -> Iterator[int | tuple[int, int, str]]: ): reading_v_stream = self._get_video_stream(reading_container) checking_v_stream = self._get_video_stream(checking_container) + chapters = self._get_chapters(reading_container) + index_pts: list[tuple[int, int]] = [] prev_pts: int | None = None prev_dts: int | None = None index, key_frame_count = 0, 0 @@ -131,6 +173,8 @@ def __iter__(self) -> Iterator[int | tuple[int, int, str]]: raise InvalidVideoError("Detected non-increasing DTS sequence in the video") prev_pts, prev_dts = frame.pts, frame.dts + insort(index_pts, (index, frame.pts), key=lambda item: item[1]) + if frame.key_frame: key_frame_data = { "pts": frame.pts, @@ -167,6 +211,25 @@ def __iter__(self) -> Iterator[int | tuple[int, int, str]]: if not self._frames_number: self._frames_number = index + if not self._chapters: + self._chapters = [] + pts_list = [item[1] for item in index_pts] + for chapter in chapters: + i = self._find_closest_pts(pts_list, chapter["start"]) + j = self._find_closest_pts(pts_list, chapter["end"]) + start = index_pts[i][0] + stop = index_pts[j][0] - 1 + if chapter["end"] > index_pts[-1][1]: + stop = index_pts[j][0] + self._chapters.append( + { + "start": start, + "stop": stop, + "metadata": chapter["metadata"], + "id": chapter["id"], + } + ) + class DatasetImagesReader: def __init__( @@ -571,6 +634,7 @@ def _write_base_information(self, file): "name": os.path.basename(self._reader.source_path), "resolution": self._reader.resolution, "length": len(self._reader), + "chapters": self._reader.chapters, }, } for key, value in base_info.items(): @@ -623,6 +687,10 @@ def video_length(self): def data(self): return self.video_name + @property + def chapters(self): + return self["properties"].get("chapters", []) + def get_subset(self, subset_names): raise NotImplementedError() diff --git a/utils/dataset_manifest/requirements.in b/utils/dataset_manifest/requirements.in index ad9a478fcef5..944c96f50437 100644 --- a/utils/dataset_manifest/requirements.in +++ b/utils/dataset_manifest/requirements.in @@ -1,4 +1,4 @@ -av==15.1.0 # Pinned for the whole CVAT +av==16.0.1 # Pinned for the whole CVAT natsort>=8.0.0 opencv-python-headless>=4.4.0.42 Pillow>=10.3.0 diff --git a/utils/dataset_manifest/requirements.txt b/utils/dataset_manifest/requirements.txt index 1864e6151c3f..ebac085e80f8 100644 --- a/utils/dataset_manifest/requirements.txt +++ b/utils/dataset_manifest/requirements.txt @@ -1,9 +1,9 @@ -# SHA1:c994feeb8bd193f610d522cfea809387b62748ab +# SHA1:cdd81a932db390b86aaaea3baef089b8d7230abb # # This file is automatically generated. # To update it, refer to cvat/requirements/README.txt. # -av==15.1.0 +av==16.0.1 # via -r utils/dataset_manifest/requirements.in natsort==8.0.0 # via -r utils/dataset_manifest/requirements.in