-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
## 🎯 What does this PR do? In Release 4: Model-level: Cyber supports new tokenizer model [Cosmos-Tokenizer](https://github.com/NVIDIA/Cosmos-Tokenizer) and new dynamic model [Deep Planning Network](https://github.com/-google-research/planet) Doc-level: Tutorial for world model is updated. ## 🔍 Related Issues <!-- Link to any related issues using #issue_number --> ## ✅ Quality Checklist - [✅] Ran `pre-commit` checks locally - [✅] Passed Ruff linting and formatting - [✅] Passed MyPy type checking - [✅] Added/updated tests - [✅] Updated documentation - [✅] Verified changes locally - [✅] No new warnings generated ## 🧪 Test Instructions <!-- Steps to test the changes --> 1. 2. 3. ## 📝 Additional Notes <!-- Any additional information that reviewers should know --> ## 💻 Local Verification Steps ```bash # Run these commands to verify your changes pre-commit run --all-files pytest tests/ # if you modified any tested code ``` --------- Co-authored-by: Weilv Chi <[email protected]> Co-authored-by: Zhao Tang <[email protected]> Co-authored-by: Haosen Yang <[email protected]> Co-authored-by: Zhao Tang <[email protected]> Co-authored-by: a752994118 <[email protected]>
- Loading branch information
1 parent
74f5c1e
commit 867571d
Showing
62 changed files
with
5,132 additions
and
516 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
cff-version: 1.2.0 | ||
message: "If you use this software, please cite it using the following metadata." | ||
title: "Cyber" | ||
date-released: 2024-11-01 | ||
version: "0.1.2" | ||
date-released: 2024-11-18 | ||
version: "0.1.3" | ||
url: "https://github.com/CyberOrigin2077/Cyber" | ||
repository-code: "https://github.com/CyberOrigin2077/Cyber" | ||
license: "Apache License, Version 2.0, January 2004" | ||
license: "Apache License, Version 2.0, January 2004" | ||
authors: | ||
- name: CYBERORIGIN PTE. LTD. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,48 @@ | ||
import logging | ||
|
||
import cv2 | ||
import glob | ||
import numpy as np | ||
import pandas as pd | ||
import torch | ||
|
||
from cyber.dataset.cyberdataset import BaseCyberDataset | ||
from torch.utils.data import Dataset | ||
|
||
|
||
# Create a package-level logger | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class RawVideoDataset(BaseCyberDataset): | ||
class RawVideoDataset(Dataset): | ||
""" | ||
Base class for all cyber datasets, provides some common functionality. | ||
a simple dataset used to retrieve raw frames from videos | ||
videos are stored in a directory with the following structure: | ||
dataset_path | ||
├── video_0.mp4 | ||
├── video_1.mp4 | ||
... | ||
""" | ||
|
||
def __init__(self, dataset_path, only_color=True): | ||
super().__init__(dataset_path) | ||
self.only_color = only_color | ||
|
||
# self.episodes_description = self.get_episodes_description(dataset_path) | ||
|
||
@classmethod | ||
def _load_all_modalities_data(cls, episode_description: pd.Series, only_color=True) -> dict: | ||
modalities = {} | ||
episode_id = episode_description["episode_id"] | ||
modalities_description = episode_description["modalities"] | ||
for modality_name, modality_description in modalities_description.items(): | ||
if only_color and modality_name != "color": | ||
continue | ||
modality = cls._load_modality_data(episode_description["path"], episode_id, modality_name, modality_description) | ||
modalities[modality_name] = modality | ||
return modalities | ||
|
||
def __getitem__(self, idx): | ||
data = self._load_all_modalities_data(self.episode_discription.loc[idx], self.only_color) | ||
matched_data = {} | ||
for modality_name, modality_data in data.items(): | ||
if modality_data["data"].dtype == np.uint16: | ||
logger.warning(f"{modality_name} has dtype uint16, this is not supported by torch, skipping this modality") | ||
continue | ||
matched_data[modality_name] = torch.tensor(modality_data["data"]) | ||
return matched_data | ||
def __init__(self, dataset_path): | ||
super().__init__() | ||
self.dataset_path = dataset_path | ||
self.video_files = glob.glob(f"{dataset_path}/*.mp4") | ||
|
||
def __len__(self) -> int: | ||
""" | ||
return the number of videos in the dataset | ||
""" | ||
return len(self.video_files) | ||
|
||
def __getitem__(self, idx) -> np.ndarray: | ||
""" | ||
get all frames from a single video, return them as a list of numpy arrays | ||
""" | ||
video_path = self.video_files[idx] | ||
cap = cv2.VideoCapture(video_path) | ||
|
||
frames = [] | ||
while True: | ||
ret, frame = cap.read() | ||
if not ret: | ||
break | ||
frames.append(frame) | ||
|
||
cap.release() | ||
return np.array(frames) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.