update valscene_inference.py running scene0249_00

HKUST-ECE-IC-Design-Center-OWL · Jan 19, 2024 · 0284dbc · 0284dbc
1 parent 5ba1b73
commit 0284dbc
Show file tree

Hide file tree

Showing 28 changed files with 5,167 additions and 140 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,139 @@
+.idea
+.wandb
+imported_data
+checkpoints
+ckpt
+data/
+results/
+incremental_results/
+outputs
+misc/vis.py
+.vscode
+.DS_Store
+*.log
+*.ckpt
+*.bin
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
diff --git a/README.md b/README.md
@@ -1,10 +1,20 @@
 # CDRNet (ICCV 2023 Workshops)
 This is a repository for "Cross-Dimensional Refined Learning for Real-Time 3D Visual Perception from Monocular Video".
 ### [Project Page](https://hafred.github.io/cdrnet/) | [Paper](https://openaccess.thecvf.com/content/ICCV2023W/JRDB/papers/Hong_Cross-Dimensional_Refined_Learning_for_Real-Time_3D_Visual_Perception_from_Monocular_ICCVW_2023_paper.pdf) | [Poster](https://github.com/stanfordironman/cdrnet.torch.2023ICCV/blob/main/iccv23_poster_cdrnet_final.pdf)
+![CDRNet Real-Time Demo](assets/cdrnet_github.gif)
+
+## Run Inference
+```bash
+python valscene_inference.py
+```
+
+## Configuration
+The key modes can be configured under `configs/inference.yaml`, where disabling `MODEL.DEPTH_PREDICTION` release the model into the geometric-semantic inference mode. The geometric-semantic information has been learned by MAP optimization with the help of 2D priors.  
 
 ## Citation
+Please consider citing our paper and give a ⭐ if you find this repository useful.
 ```
-@inproceedings{Hong_2023_ICCV,
+@inproceedings{hong2023cross,
     author    = {Hong, Ziyang and Yue, C. Patrick},
     title     = {Cross-Dimensional Refined Learning for Real-Time 3D Visual Perception from Monocular Video},
     booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},

diff --git a/assets/cdrnet_github.gif b/assets/cdrnet_github.gif
diff --git a/configs/__init__.py b/configs/__init__.py
@@ -0,0 +1,2 @@
+from .default import _C as cfg
+from .default import update_config
diff --git a/configs/default.py b/configs/default.py
@@ -0,0 +1,99 @@
+from yacs.config import CfgNode as CN
+
+_C = CN()
+
+_C.MODE = 'train'
+_C.DATASET = 'scannet'
+_C.PKL_PATH = 'data/meta_data'
+_C.BATCH_SIZE = 1
+_C.LOADCKPT = None
+_C.LOGDIR = ''
+_C.DEBUG = True
+_C.WANDB = False
+_C.WANDB_PROJ = ''
+_C.WANDB_RUN_NAME = ''
+_C.RESUME = True  # take the latest ckpt
+_C.SUMMARY_FREQ = 20
+_C.SAVE_FREQ = 1
+_C.SEED = 42
+_C.SAVE_SCENE_MESH = False
+_C.SAVE_INCREMENTAL = False
+_C.VIS_INCREMENTAL = False
+_C.VIS_MESH_SEMSEG = True
+_C.REDUCE_GPU_MEM = False
+_C.RTMP_SERVER = None
+_C.POSE_SERVER = None
+_C.LOCAL_RANK = 0
+_C.DISTRIBUTED = False
+
+# test
+_C.TEST = CN()
+_C.TEST.PATH = ''
+_C.TEST.N_VIEWS = 5
+_C.TEST.N_WORKERS = 4
+_C.TEST.DATASET_SPLIT = None
+
+# model
+_C.MODEL = CN()
+_C.MODEL.N_VOX = [128, 224, 192]
+_C.MODEL.VOXEL_SIZE = 0.04
+_C.MODEL.THRESHOLDS = [0, 0, 0]
+_C.MODEL.N_STAGE = 3
+_C.MODEL.STAGE_LIST = ['coarse', 'medium', 'fine']
+_C.MODEL.TRAIN_NUM_SAMPLE = [4096, 16384, 65536]
+_C.MODEL.TEST_NUM_SAMPLE = [32768, 131072]
+_C.MODEL.LW = [1.0, 0.8, 0.64]
+_C.MODEL.PIXEL_MEAN = [103.53, 116.28, 123.675]
+_C.MODEL.PIXEL_STD = [1., 1., 1.]
+_C.MODEL.THRESHOLDS = [0, 0, 0]
+_C.MODEL.POS_WEIGHT = 1.0
+_C.MODEL.VIS_DEPTH = False
+_C.MODEL.VIS_DEBUG_REFMNT = False
+_C.MODEL.DEPTH_PREDICTION = False
+
+_C.MODEL.BACKBONE2D = CN()
+_C.MODEL.BACKBONE2D.ARC = 'fpn-mnas'
+_C.MODEL.BACKBONE2D.CHANNELS = [96, 48, 24]  # c/m/f: 16th/8th/4th
+_C.MODEL.SEMSEG_MULTISCALE = True
+_C.MODEL.LW_SEMSEG = [1.0, 0.8, 0.64]  # semseg also needs to be multi-scale
+
+_C.MODEL.SPARSEREG = CN()
+_C.MODEL.SPARSEREG.DROPOUT = False
+
+_C.MODEL.FUSION = CN()
+_C.MODEL.FUSION.FUSION_ON = False  # control whether gru_fusion() is utilized in the coarse to fine network
+_C.MODEL.FUSION.HIDDEN_DIM = 64
+_C.MODEL.FUSION.AVERAGE = False
+_C.MODEL.FUSION.FULL = False  # control whether to merge the local TSDF volume with the global one, if True, needs to update the grid_mask to calculate correct loss for the local, then valid_volume, and updated_coords
+_C.MODEL.CDR = CN()
+_C.MODEL.CDR.SEMSEG_LOSS_INCLUDE = True  # optimize on semseg loss
+_C.MODEL.CDR.DEPTH_PRED = False  # mvsnet init depth prediction and pointflow depth refinement
+_C.MODEL.CDR.FEAT_REFMNT = False  # feature refinement
+_C.MODEL.CDR.SEMSEG_REFMNT = False  # semseg 2d link to 3d
+_C.MODEL.CDR.SEMSEG_2D = False
+_C.MODEL.CDR.SEMSEG_CLASS_3D = 41  # semseg 2d link to 3d
+_C.MODEL.CDR.SEMSEG_CLASS_2D = 20  # to solve label unmatched error? RuntimeError: CUDA error: device-side assert triggered
+_C.MODEL.CDR.N_ITERS = 1
+_C.MODEL.CDR.OFFSETS = [0.05, 0.05, 0.025]
+_C.MODEL.CDR.FEAT_DIM = 32
+
+# 2D depth prediction options
+_C.MODEL.CDR.IMG_SIZE = (480, 640)  # this is 256x320 from 3dvnet, not used for now. The dbatch seems ok with 480x640
+# model dimension settings
+_C.MODEL.CDR.CHANNEL_FEAT_DIM = 24  # this is the channel dim of feats_quarter, needed to be matched with the channel setup in FPN of backbone2d
+_C.MODEL.CDR.GRID_EDGE_LEN = 0.08  # voxel resolution for scene-modeling step
+
+# mod for dbatch in mvs setups
+_C.MODEL.DEPTH_MVS = CN()  # some config for init depth pred with mvsnet
+_C.MODEL.DEPTH_MVS.IMAGE_GT_SIZE = (480, 640)
+_C.MODEL.DEPTH_MVS.DEPTH_GT_SIZE = (480, 640)
+_C.MODEL.DEPTH_MVS.DEPTH_START = 0.5
+_C.MODEL.DEPTH_MVS.DEPTH_INTERVAL = 0.05  # for the init depth pred from mvsnet, each interval is .05m
+_C.MODEL.DEPTH_MVS.N_INTERVALS = 96 # 'depth_pred_size': (60, 80),  # resolution of feat_8, so that not oom
+_C.MODEL.DEPTH_MVS.DEPTH_PRED_SIZE = (56, 56)  # resolution tested by 3dvnet, which shows that is optimal than feat_8
+
+
+def update_config(cfg, args):
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.freeze()
diff --git a/configs/inference_scene0249.yaml b/configs/inference_scene0249.yaml
@@ -0,0 +1,38 @@
+DATASET: 'scannet'
+BATCH_SIZE: 1
+SAVE_SCENE_MESH: True
+SAVE_INCREMENTAL: True
+VIS_INCREMENTAL: False
+REDUCE_GPU_MEM: True  # drastically reduces GPU memory and will slow down inference a bit.
+LOGDIR: 'logs_viz_one_scene_valset'
+SAVE_FREQ: 1
+MODE: 'val'
+#DEBUG: False
+
+TEST:
+  PATH: '/media/zhongad/2TB/dataset/scannet'
+  N_VIEWS: 9
+  N_WORKERS: 0
+
+MODEL:
+  DEPTH_PREDICTION: False
+  VIS_DEPTH: False
+  N_STAGE: 3
+  THRESHOLDS: [ 0, 0, 0 ]  # threshold for occupancy, that defines the sparsity for each stage
+  POS_WEIGHT: 1.5
+  N_VOX: [96, 96, 96]
+  VOXEL_SIZE: 0.04
+  TRAIN_NUM_SAMPLE: [4096, 16384, 65536]
+  TEST_NUM_SAMPLE: [4096, 16384, 65536]
+  BACKBONE2D:
+    ARC: 'fpn-mnas-1'
+  LW: [1.0, 0.8, 0.64]
+  LW_SEMSEG: [1.0, 0.8, 0.64]
+  FUSION:
+    FUSION_ON: True
+    FULL: True
+  CDR:
+    DEPTH_PRED: True  # mvsnet init depth prediction and point flow depth refinement
+    FEAT_REFMNT: True  # anchor occupancy refinement
+    SEMSEG_REFMNT: True  # semseg refmnt
+    SEMSEG_2D: True
diff --git a/datasets/batch.py b/datasets/batch.py
@@ -0,0 +1,56 @@
+# The batch class here is to cater the graph NN for point cloud edge convolutions
+from torch_geometric import data
+import numpy as np
+import torch
+
+
+class Batch(data.Data):
+    """
+    Batch class used in both the suncg dataloader and the scannet dataloader
+    """
+    def __init__(self, images, rotmats, tvecs, K, depth_images, ref_src_edges, raw_imgs):
+        super(Batch, self).__init__()
+        self.images = images
+        self.rotmats = rotmats
+        self.tvecs = tvecs
+        self.K = K
+        self.depth_images = depth_images
+        self.ref_src_edges = ref_src_edges
+        self.raw_imgs = raw_imgs
+
+    def __inc__(self, key, value, *args, **kwargs):
+        if key == 'ref_src_edges':
+            return self.images.shape[0]
+        else:
+            return super(Batch, self).__inc__(key, value)
+
+    def __cat_dim__(self, key, value, *args, **kwargs):
+        if 'edges' in key:
+            return 1
+        else:
+            return 0
+
+    def save(self, filepath):
+        np.savez(
+            filepath,
+            images=self.images.detach().cpu().numpy(),
+            rotmats=self.rotmats.detach().cpu().numpy(),
+            tvecs=self.tvecs.detach().cpu().numpy(),
+            K=self.K.detach().cpu().numpy(),
+            depth_images=self.depth_images.detach().cpu().numpy(),
+            ref_src_edges=self.ref_src_edges.detach().cpu().numpy(),
+        )
+
+    @staticmethod
+    def load(filepath):
+        data = np.load(filepath)
+        return Batch(
+            images=torch.from_numpy(data['images']).float(),
+            raw_imgs=torch.from_numpy(data['raw_imgs']).float(),
+            rotmats=torch.from_numpy(data['rotmats']).float(),
+            tvecs=torch.from_numpy(data['tvecs']).float(),
+            K=torch.from_numpy(data['K']).float(),
+            depth_images=torch.from_numpy(data['depth_images']).float(),
+            ref_src_edges=torch.from_numpy(data['ref_src_edges']).long(),
+        )
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .default import _C as cfg
		from .default import update_config