Skip to content

Commit

Permalink
update valscene_inference.py running scene0249_00
Browse files Browse the repository at this point in the history
  • Loading branch information
HaFred committed Jan 19, 2024
1 parent 5ba1b73 commit 0284dbc
Show file tree
Hide file tree
Showing 28 changed files with 5,167 additions and 140 deletions.
139 changes: 139 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
.idea
.wandb
imported_data
checkpoints
ckpt
data/
results/
incremental_results/
outputs
misc/vis.py
.vscode
.DS_Store
*.log
*.ckpt
*.bin

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# CDRNet (ICCV 2023 Workshops)
This is a repository for "Cross-Dimensional Refined Learning for Real-Time 3D Visual Perception from Monocular Video".
### [Project Page](https://hafred.github.io/cdrnet/) | [Paper](https://openaccess.thecvf.com/content/ICCV2023W/JRDB/papers/Hong_Cross-Dimensional_Refined_Learning_for_Real-Time_3D_Visual_Perception_from_Monocular_ICCVW_2023_paper.pdf) | [Poster](https://github.com/stanfordironman/cdrnet.torch.2023ICCV/blob/main/iccv23_poster_cdrnet_final.pdf)
![CDRNet Real-Time Demo](assets/cdrnet_github.gif)

## Run Inference
```bash
python valscene_inference.py
```

## Configuration
The key modes can be configured under `configs/inference.yaml`, where disabling `MODEL.DEPTH_PREDICTION` release the model into the geometric-semantic inference mode. The geometric-semantic information has been learned by MAP optimization with the help of 2D priors.

## Citation
Please consider citing our paper and give a ⭐ if you find this repository useful.
```
@inproceedings{Hong_2023_ICCV,
@inproceedings{hong2023cross,
author = {Hong, Ziyang and Yue, C. Patrick},
title = {Cross-Dimensional Refined Learning for Real-Time 3D Visual Perception from Monocular Video},
booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
Expand Down
Binary file added assets/cdrnet_github.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .default import _C as cfg
from .default import update_config
99 changes: 99 additions & 0 deletions configs/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from yacs.config import CfgNode as CN

_C = CN()

_C.MODE = 'train'
_C.DATASET = 'scannet'
_C.PKL_PATH = 'data/meta_data'
_C.BATCH_SIZE = 1
_C.LOADCKPT = None
_C.LOGDIR = ''
_C.DEBUG = True
_C.WANDB = False
_C.WANDB_PROJ = ''
_C.WANDB_RUN_NAME = ''
_C.RESUME = True # take the latest ckpt
_C.SUMMARY_FREQ = 20
_C.SAVE_FREQ = 1
_C.SEED = 42
_C.SAVE_SCENE_MESH = False
_C.SAVE_INCREMENTAL = False
_C.VIS_INCREMENTAL = False
_C.VIS_MESH_SEMSEG = True
_C.REDUCE_GPU_MEM = False
_C.RTMP_SERVER = None
_C.POSE_SERVER = None
_C.LOCAL_RANK = 0
_C.DISTRIBUTED = False

# test
_C.TEST = CN()
_C.TEST.PATH = ''
_C.TEST.N_VIEWS = 5
_C.TEST.N_WORKERS = 4
_C.TEST.DATASET_SPLIT = None

# model
_C.MODEL = CN()
_C.MODEL.N_VOX = [128, 224, 192]
_C.MODEL.VOXEL_SIZE = 0.04
_C.MODEL.THRESHOLDS = [0, 0, 0]
_C.MODEL.N_STAGE = 3
_C.MODEL.STAGE_LIST = ['coarse', 'medium', 'fine']
_C.MODEL.TRAIN_NUM_SAMPLE = [4096, 16384, 65536]
_C.MODEL.TEST_NUM_SAMPLE = [32768, 131072]
_C.MODEL.LW = [1.0, 0.8, 0.64]
_C.MODEL.PIXEL_MEAN = [103.53, 116.28, 123.675]
_C.MODEL.PIXEL_STD = [1., 1., 1.]
_C.MODEL.THRESHOLDS = [0, 0, 0]
_C.MODEL.POS_WEIGHT = 1.0
_C.MODEL.VIS_DEPTH = False
_C.MODEL.VIS_DEBUG_REFMNT = False
_C.MODEL.DEPTH_PREDICTION = False

_C.MODEL.BACKBONE2D = CN()
_C.MODEL.BACKBONE2D.ARC = 'fpn-mnas'
_C.MODEL.BACKBONE2D.CHANNELS = [96, 48, 24] # c/m/f: 16th/8th/4th
_C.MODEL.SEMSEG_MULTISCALE = True
_C.MODEL.LW_SEMSEG = [1.0, 0.8, 0.64] # semseg also needs to be multi-scale

_C.MODEL.SPARSEREG = CN()
_C.MODEL.SPARSEREG.DROPOUT = False

_C.MODEL.FUSION = CN()
_C.MODEL.FUSION.FUSION_ON = False # control whether gru_fusion() is utilized in the coarse to fine network
_C.MODEL.FUSION.HIDDEN_DIM = 64
_C.MODEL.FUSION.AVERAGE = False
_C.MODEL.FUSION.FULL = False # control whether to merge the local TSDF volume with the global one, if True, needs to update the grid_mask to calculate correct loss for the local, then valid_volume, and updated_coords
_C.MODEL.CDR = CN()
_C.MODEL.CDR.SEMSEG_LOSS_INCLUDE = True # optimize on semseg loss
_C.MODEL.CDR.DEPTH_PRED = False # mvsnet init depth prediction and pointflow depth refinement
_C.MODEL.CDR.FEAT_REFMNT = False # feature refinement
_C.MODEL.CDR.SEMSEG_REFMNT = False # semseg 2d link to 3d
_C.MODEL.CDR.SEMSEG_2D = False
_C.MODEL.CDR.SEMSEG_CLASS_3D = 41 # semseg 2d link to 3d
_C.MODEL.CDR.SEMSEG_CLASS_2D = 20 # to solve label unmatched error? RuntimeError: CUDA error: device-side assert triggered
_C.MODEL.CDR.N_ITERS = 1
_C.MODEL.CDR.OFFSETS = [0.05, 0.05, 0.025]
_C.MODEL.CDR.FEAT_DIM = 32

# 2D depth prediction options
_C.MODEL.CDR.IMG_SIZE = (480, 640) # this is 256x320 from 3dvnet, not used for now. The dbatch seems ok with 480x640
# model dimension settings
_C.MODEL.CDR.CHANNEL_FEAT_DIM = 24 # this is the channel dim of feats_quarter, needed to be matched with the channel setup in FPN of backbone2d
_C.MODEL.CDR.GRID_EDGE_LEN = 0.08 # voxel resolution for scene-modeling step

# mod for dbatch in mvs setups
_C.MODEL.DEPTH_MVS = CN() # some config for init depth pred with mvsnet
_C.MODEL.DEPTH_MVS.IMAGE_GT_SIZE = (480, 640)
_C.MODEL.DEPTH_MVS.DEPTH_GT_SIZE = (480, 640)
_C.MODEL.DEPTH_MVS.DEPTH_START = 0.5
_C.MODEL.DEPTH_MVS.DEPTH_INTERVAL = 0.05 # for the init depth pred from mvsnet, each interval is .05m
_C.MODEL.DEPTH_MVS.N_INTERVALS = 96 # 'depth_pred_size': (60, 80), # resolution of feat_8, so that not oom
_C.MODEL.DEPTH_MVS.DEPTH_PRED_SIZE = (56, 56) # resolution tested by 3dvnet, which shows that is optimal than feat_8


def update_config(cfg, args):
cfg.defrost()
cfg.merge_from_file(args.cfg)
cfg.freeze()
38 changes: 38 additions & 0 deletions configs/inference_scene0249.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
DATASET: 'scannet'
BATCH_SIZE: 1
SAVE_SCENE_MESH: True
SAVE_INCREMENTAL: True
VIS_INCREMENTAL: False
REDUCE_GPU_MEM: True # drastically reduces GPU memory and will slow down inference a bit.
LOGDIR: 'logs_viz_one_scene_valset'
SAVE_FREQ: 1
MODE: 'val'
#DEBUG: False

TEST:
PATH: '/media/zhongad/2TB/dataset/scannet'
N_VIEWS: 9
N_WORKERS: 0

MODEL:
DEPTH_PREDICTION: False
VIS_DEPTH: False
N_STAGE: 3
THRESHOLDS: [ 0, 0, 0 ] # threshold for occupancy, that defines the sparsity for each stage
POS_WEIGHT: 1.5
N_VOX: [96, 96, 96]
VOXEL_SIZE: 0.04
TRAIN_NUM_SAMPLE: [4096, 16384, 65536]
TEST_NUM_SAMPLE: [4096, 16384, 65536]
BACKBONE2D:
ARC: 'fpn-mnas-1'
LW: [1.0, 0.8, 0.64]
LW_SEMSEG: [1.0, 0.8, 0.64]
FUSION:
FUSION_ON: True
FULL: True
CDR:
DEPTH_PRED: True # mvsnet init depth prediction and point flow depth refinement
FEAT_REFMNT: True # anchor occupancy refinement
SEMSEG_REFMNT: True # semseg refmnt
SEMSEG_2D: True
56 changes: 56 additions & 0 deletions datasets/batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# The batch class here is to cater the graph NN for point cloud edge convolutions
from torch_geometric import data
import numpy as np
import torch


class Batch(data.Data):
"""
Batch class used in both the suncg dataloader and the scannet dataloader
"""
def __init__(self, images, rotmats, tvecs, K, depth_images, ref_src_edges, raw_imgs):
super(Batch, self).__init__()
self.images = images
self.rotmats = rotmats
self.tvecs = tvecs
self.K = K
self.depth_images = depth_images
self.ref_src_edges = ref_src_edges
self.raw_imgs = raw_imgs

def __inc__(self, key, value, *args, **kwargs):
if key == 'ref_src_edges':
return self.images.shape[0]
else:
return super(Batch, self).__inc__(key, value)

def __cat_dim__(self, key, value, *args, **kwargs):
if 'edges' in key:
return 1
else:
return 0

def save(self, filepath):
np.savez(
filepath,
images=self.images.detach().cpu().numpy(),
rotmats=self.rotmats.detach().cpu().numpy(),
tvecs=self.tvecs.detach().cpu().numpy(),
K=self.K.detach().cpu().numpy(),
depth_images=self.depth_images.detach().cpu().numpy(),
ref_src_edges=self.ref_src_edges.detach().cpu().numpy(),
)

@staticmethod
def load(filepath):
data = np.load(filepath)
return Batch(
images=torch.from_numpy(data['images']).float(),
raw_imgs=torch.from_numpy(data['raw_imgs']).float(),
rotmats=torch.from_numpy(data['rotmats']).float(),
tvecs=torch.from_numpy(data['tvecs']).float(),
K=torch.from_numpy(data['K']).float(),
depth_images=torch.from_numpy(data['depth_images']).float(),
ref_src_edges=torch.from_numpy(data['ref_src_edges']).long(),
)

Loading

0 comments on commit 0284dbc

Please sign in to comment.