[version0.1.3] Release 4 (#15)

## 🎯 What does this PR do? In Release 4: Model-level: Cyber supports new tokenizer model [Cosmos-Tokenizer](https://github.com/NVIDIA/Cosmos-Tokenizer) and new dynamic model [Deep Planning Network](https://github.com/-google-research/planet) Doc-level: Tutorial for world model is updated. ## 🔍 Related Issues  ## ✅ Quality Checklist - [✅] Ran `pre-commit` checks locally - [✅] Passed Ruff linting and formatting - [✅] Passed MyPy type checking - [✅] Added/updated tests - [✅] Updated documentation - [✅] Verified changes locally - [✅] No new warnings generated ## 🧪 Test Instructions  1. 2. 3. ## 📝 Additional Notes  ## 💻 Local Verification Steps ```bash # Run these commands to verify your changes pre-commit run --all-files pytest tests/ # if you modified any tested code ``` --------- Co-authored-by: Weilv Chi <[email protected]> Co-authored-by: Zhao Tang <[email protected]> Co-authored-by: Haosen Yang <[email protected]> Co-authored-by: Zhao Tang <[email protected]> Co-authored-by: a752994118 <[email protected]>
CyberOrigin2077 · Nov 18, 2024 · 867571d · 867571d
1 parent 74f5c1e
commit 867571d
Show file tree

Hide file tree

Showing 62 changed files with 5,132 additions and 516 deletions.
diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml
@@ -6,12 +6,14 @@ on:
   push: # When the main branch is pushed to the GitHub repository
     branches:
       - main
+      - internal
     paths:
       - 'docs/**'
       - 'mkdocs.yaml'
   pull_request: # When a pull request is merged into the main branch
     branches:
       - main
+      - internal
     paths:
       - 'docs/**'
       - 'mkdocs.yaml'

diff --git a/CITATION.cff b/CITATION.cff
@@ -1,8 +1,10 @@
 cff-version: 1.2.0
 message: "If you use this software, please cite it using the following metadata."
 title: "Cyber"
-date-released: 2024-11-01
-version: "0.1.2"
+date-released: 2024-11-18
+version: "0.1.3"
 url: "https://github.com/CyberOrigin2077/Cyber"
 repository-code: "https://github.com/CyberOrigin2077/Cyber"
-license: "Apache License, Version 2.0, January 2004"
+license: "Apache License, Version 2.0, January 2004"
+authors:
+  - name: CYBERORIGIN PTE. LTD.
diff --git a/LICENSE b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2024 CyberOrigin PTE.LTD.
+   Copyright 2024 CYBERORIGIN PTE. LTD.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/cyber/dataset/rawvideodataset.py b/cyber/dataset/rawvideodataset.py
@@ -1,45 +1,48 @@
 import logging
-
+import cv2
+import glob
 import numpy as np
-import pandas as pd
-import torch
-
-from cyber.dataset.cyberdataset import BaseCyberDataset
+from torch.utils.data import Dataset
 
 
 # Create a package-level logger
 logger = logging.getLogger(__name__)
 
 
-class RawVideoDataset(BaseCyberDataset):
+class RawVideoDataset(Dataset):
     """
-    Base class for all cyber datasets, provides some common functionality.
+    a simple dataset used to retrieve raw frames from videos
+    videos are stored in a directory with the following structure:
+    dataset_path
+    ├── video_0.mp4
+    ├── video_1.mp4
+    ...
     """
 
-    def __init__(self, dataset_path, only_color=True):
-        super().__init__(dataset_path)
-        self.only_color = only_color
-
-        # self.episodes_description = self.get_episodes_description(dataset_path)
-
-    @classmethod
-    def _load_all_modalities_data(cls, episode_description: pd.Series, only_color=True) -> dict:
-        modalities = {}
-        episode_id = episode_description["episode_id"]
-        modalities_description = episode_description["modalities"]
-        for modality_name, modality_description in modalities_description.items():
-            if only_color and modality_name != "color":
-                continue
-            modality = cls._load_modality_data(episode_description["path"], episode_id, modality_name, modality_description)
-            modalities[modality_name] = modality
-        return modalities
-
-    def __getitem__(self, idx):
-        data = self._load_all_modalities_data(self.episode_discription.loc[idx], self.only_color)
-        matched_data = {}
-        for modality_name, modality_data in data.items():
-            if modality_data["data"].dtype == np.uint16:
-                logger.warning(f"{modality_name} has dtype uint16, this is not supported by torch, skipping this modality")
-                continue
-            matched_data[modality_name] = torch.tensor(modality_data["data"])
-        return matched_data
+    def __init__(self, dataset_path):
+        super().__init__()
+        self.dataset_path = dataset_path
+        self.video_files = glob.glob(f"{dataset_path}/*.mp4")
+
+    def __len__(self) -> int:
+        """
+        return the number of videos in the dataset
+        """
+        return len(self.video_files)
+
+    def __getitem__(self, idx) -> np.ndarray:
+        """
+        get all frames from a single video, return them as a list of numpy arrays
+        """
+        video_path = self.video_files[idx]
+        cap = cv2.VideoCapture(video_path)
+
+        frames = []
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frames.append(frame)
+
+        cap.release()
+        return np.array(frames)
diff --git a/cyber/models/perception/mapping/topomap/gt_map.py b/cyber/models/perception/mapping/topomap/gt_map.py
@@ -2,14 +2,14 @@
 from skimage.io import imread
 
 
-class GTMap():
+class GTMap:
     def __init__(self, gt_map_file):
-        gt_map_filename = gt_map_file.split('/')[-1]
-        i1, i2, j1, j2 = [int(x) for x in gt_map_filename[12:-4].split('_')]
+        gt_map_filename = gt_map_file.split("/")[-1]
+        i1, i2, j1, j2 = [int(x) for x in gt_map_filename[12:-4].split("_")]
         self.start_i = i1
         self.start_j = j1
         gt_map = imread(gt_map_file)
         self.gt_map = gt_map
-        obstacle_map = (gt_map == 0)
-        explored_map = (gt_map != 127)
+        obstacle_map = gt_map == 0
+        explored_map = gt_map != 127
         grid_map = np.concatenate([explored_map[:, :, np.newaxis], obstacle_map[:, :, np.newaxis]], axis=2)
diff --git a/cyber/models/perception/mapping/topomap/localization.py b/cyber/models/perception/mapping/topomap/localization.py
@@ -1,6 +1,7 @@
 import rospy
 import os
 import numpy as np
+
 np.float = np.float64
 import ros_numpy
 from sensor_msgs.msg import PointCloud2, Image
@@ -11,11 +12,11 @@
 import time
 from utils import apply_pose_shift
 
-tests_dir = '/home/kirill/TopoSLAM/OpenPlaceRecognition/test_registration'
+tests_dir = "/home/kirill/TopoSLAM/OpenPlaceRecognition/test_registration"
 
 
-class Localizer():
-    def __init__(self, graph, gt_map, map_frame='map', publish=True, top_k=5):
+class Localizer:
+    def __init__(self, graph, gt_map, map_frame="map", publish=True, top_k=5):
         self.graph = graph
         self.gt_map = gt_map
         self.top_k = top_k
@@ -39,28 +40,28 @@ def __init__(self, graph, gt_map, map_frame='map', publish=True, top_k=5):
             os.mkdir(tests_dir)
         self.publish = publish
         self.map_frame = map_frame
-        self.result_publisher = rospy.Publisher('/localized_nodes', Float32MultiArray, latch=True, queue_size=100)
-        self.cand_cloud_publisher = rospy.Publisher('/candidate_cloud', PointCloud2, latch=True, queue_size=100)
-        self.matched_points_publisher = rospy.Publisher('/matched_points', Marker, latch=True, queue_size=100)
-        self.unmatched_points_publisher = rospy.Publisher('/unmatched_points', Marker, latch=True, queue_size=100)
-        self.transforms_publisher = rospy.Publisher('/localization_transforms', Marker, latch=True, queue_size=100)
-        self.first_pr_publisher = rospy.Publisher('/first_point', Marker, latch=True, queue_size=100)
-        self.first_pr_image_publisher = rospy.Publisher('/place_recognition/image', Image, latch=True, queue_size=100)
-        self.freeze_publisher = rospy.Publisher('/freeze', Bool, latch=True, queue_size=100)
+        self.result_publisher = rospy.Publisher("/localized_nodes", Float32MultiArray, latch=True, queue_size=100)
+        self.cand_cloud_publisher = rospy.Publisher("/candidate_cloud", PointCloud2, latch=True, queue_size=100)
+        self.matched_points_publisher = rospy.Publisher("/matched_points", Marker, latch=True, queue_size=100)
+        self.unmatched_points_publisher = rospy.Publisher("/unmatched_points", Marker, latch=True, queue_size=100)
+        self.transforms_publisher = rospy.Publisher("/localization_transforms", Marker, latch=True, queue_size=100)
+        self.first_pr_publisher = rospy.Publisher("/first_point", Marker, latch=True, queue_size=100)
+        self.first_pr_image_publisher = rospy.Publisher("/place_recognition/image", Image, latch=True, queue_size=100)
+        self.freeze_publisher = rospy.Publisher("/freeze", Bool, latch=True, queue_size=100)
         self.bridge = CvBridge()
 
     def save_reg_test_data(self, vertex_ids, transforms, pr_scores, reg_scores, save_dir):
         if not os.path.exists(save_dir):
             os.mkdir(save_dir)
-        np.savez(os.path.join(save_dir, 'ref_cloud.npz'), self.localized_cloud)
+        np.savez(os.path.join(save_dir, "ref_cloud.npz"), self.localized_cloud)
         # print('Mean of the ref cloud:', self.localized_cloud[:, :3].mean())
         tf_data = []
         gt_pose_data = [[self.localized_x, self.localized_y, self.localized_theta]]
         for idx, tf in zip(vertex_ids, transforms, strict=False):
             if idx >= 0:
                 vertex_dict = self.graph.vertices[idx]
-                x, y, theta = vertex_dict['pose_for_visualization']
-                cloud = vertex_dict['cloud']
+                x, y, theta = vertex_dict["pose_for_visualization"]
+                cloud = vertex_dict["cloud"]
                 # print('GT x, y, theta:', x, y, theta)
                 # np.savetxt(os.path.join(save_dir, 'cand_cloud_{}.txt'.format(idx)), cloud)
                 if tf is not None:
@@ -69,10 +70,10 @@ def save_reg_test_data(self, vertex_ids, transforms, pr_scores, reg_scores, save
                     tf_data.append([idx, 0, 0, 0, 0, 0, 0])
                 gt_pose_data.append([x, y, theta])
         # print('TF data:', tf_data)
-        np.savetxt(os.path.join(save_dir, 'gt_poses.txt'), np.array(gt_pose_data))
-        np.savetxt(os.path.join(save_dir, 'transforms.txt'), np.array(tf_data))
-        np.savetxt(os.path.join(save_dir, 'pr_scores.txt'), np.array(pr_scores))
-        np.savetxt(os.path.join(save_dir, 'reg_scores.txt'), np.array(reg_scores))
+        np.savetxt(os.path.join(save_dir, "gt_poses.txt"), np.array(gt_pose_data))
+        np.savetxt(os.path.join(save_dir, "transforms.txt"), np.array(tf_data))
+        np.savetxt(os.path.join(save_dir, "pr_scores.txt"), np.array(pr_scores))
+        np.savetxt(os.path.join(save_dir, "reg_scores.txt"), np.array(reg_scores))
 
     def publish_localization_results(self, vertex_id_first, vertex_ids_matched, vertex_ids_unmatched, rel_poses):
         # Publish top-1 PlaceRecognition vertex
@@ -92,14 +93,14 @@ def publish_localization_results(self, vertex_id_first, vertex_ids_matched, vert
         vertices_marker.color.g = 1
         vertices_marker.color.b = 0
         vertices_marker.color.a = 1
-        x, y, _ = self.graph.vertices[vertex_id_first]['pose_for_visualization']
-        img_front = self.graph.vertices[vertex_id_first]['img_front']
+        x, y, _ = self.graph.vertices[vertex_id_first]["pose_for_visualization"]
+        img_front = self.graph.vertices[vertex_id_first]["img_front"]
         vertices_marker.points.append(Point(x, y, 0.1))
         self.first_pr_publisher.publish(vertices_marker)
 
         # Publish top-1 PlaceRecognition image
         img_msg = self.bridge.cv2_to_imgmsg(img_front)
-        img_msg.encoding = 'rgb8'
+        img_msg.encoding = "rgb8"
         img_msg.header.stamp = rospy.Time.now()
         self.first_pr_image_publisher.publish(img_msg)
 
@@ -119,7 +120,7 @@ def publish_localization_results(self, vertex_id_first, vertex_ids_matched, vert
         vertices_marker.color.a = 1
         localized_vertices = [self.graph.vertices[i] for i in vertex_ids_matched]
         for vertex_dict in localized_vertices:
-            x, y, _ = vertex_dict['pose_for_visualization']
+            x, y, _ = vertex_dict["pose_for_visualization"]
             vertices_marker.points.append(Point(x, y, 0.1))
         self.matched_points_publisher.publish(vertices_marker)
 
@@ -134,7 +135,7 @@ def publish_localization_results(self, vertex_id_first, vertex_ids_matched, vert
         transforms_marker.color.a = 0.5
         transforms_marker.pose.orientation.w = 1
         for vertex_dict, rel_pose in zip(localized_vertices, rel_poses, strict=False):
-            x, y, theta = vertex_dict['pose_for_visualization']
+            x, y, theta = vertex_dict["pose_for_visualization"]
             transforms_marker.points.append(Point(x, y, 0.1))
             x, y, theta = apply_pose_shift([x, y, theta], *rel_pose)
             transforms_marker.points.append(Point(x, y, 0.1))
@@ -153,7 +154,7 @@ def publish_localization_results(self, vertex_id_first, vertex_ids_matched, vert
         vertices_marker.color.a = 1
         localized_vertices = [self.graph.vertices[i] for i in vertex_ids_unmatched]
         for vertex_dict in localized_vertices:
-            x, y, _ = vertex_dict['pose_for_visualization']
+            x, y, _ = vertex_dict["pose_for_visualization"]
             vertices_marker.points.append(Point(x, y, 0.1))
         self.unmatched_points_publisher.publish(vertices_marker)
 
@@ -182,17 +183,21 @@ def publish_result(self, vertex_ids, rel_poses):
         self.result_publisher.publish(result_msg)
         if len(vertex_ids) > 0:
             i = vertex_ids[0]
-            cloud = self.graph.vertices[i]['cloud']
-            cloud_with_fields = np.zeros((cloud.shape[0]), dtype=[
-                ('x', np.float32),
-                ('y', np.float32),
-                ('z', np.float32),  # ]),
-                ('r', np.uint8),
-                ('g', np.uint8),
-                ('b', np.uint8)])
-            cloud_with_fields['x'] = cloud[:, 0]
-            cloud_with_fields['y'] = cloud[:, 1]
-            cloud_with_fields['z'] = cloud[:, 2]
+            cloud = self.graph.vertices[i]["cloud"]
+            cloud_with_fields = np.zeros(
+                (cloud.shape[0]),
+                dtype=[
+                    ("x", np.float32),
+                    ("y", np.float32),
+                    ("z", np.float32),  # ]),
+                    ("r", np.uint8),
+                    ("g", np.uint8),
+                    ("b", np.uint8),
+                ],
+            )
+            cloud_with_fields["x"] = cloud[:, 0]
+            cloud_with_fields["y"] = cloud[:, 1]
+            cloud_with_fields["z"] = cloud[:, 2]
             # cloud_with_fields['r'] = cloud[:, 3]
             # cloud_with_fields['g'] = cloud[:, 4]
             # cloud_with_fields['b'] = cloud[:, 5]
@@ -202,12 +207,12 @@ def publish_result(self, vertex_ids, rel_poses):
                 cloud_msg.header.stamp = rospy.Time.now()
             else:
                 cloud_msg.header.stamp = self.stamp
-            cloud_msg.header.frame_id = 'base_link'
+            cloud_msg.header.frame_id = "base_link"
             self.cand_cloud_publisher.publish(cloud_msg)
 
     def localize(self, event=None):
         if self.global_pose_for_visualization is None:
-            print('No global pose provided!')
+            print("No global pose provided!")
             return
         dt = (rospy.Time.now() - self.stamp).to_sec()
         # print('Localization lag:', dt)
@@ -225,11 +230,9 @@ def localize(self, event=None):
         # print('Position at start:', self.global_pose_for_visualization)
         self.graph.global_pose_for_visualization = self.global_pose_for_visualization
         if self.cloud is not None:
-            vertex_ids_pr_raw, vertex_ids_pr, transforms, pr_scores, reg_scores = self.graph.get_k_most_similar(self.img_front,
-                                                                                                                self.img_back,
-                                                                                                                self.cloud,
-                                                                                                                self.stamp,
-                                                                                                                k=self.top_k)
+            vertex_ids_pr_raw, vertex_ids_pr, transforms, pr_scores, reg_scores = self.graph.get_k_most_similar(
+                self.img_front, self.img_back, self.cloud, self.stamp, k=self.top_k
+            )
             t2 = time.time()
             # print('Get k most similar time:', t2 - t1)
             # save_dir = os.path.join(tests_dir, 'test_{}'.format(self.cnt))