From 3a51a199482f596c3b98b921c79d337010f48bb1 Mon Sep 17 00:00:00 2001
From: Zeyu Ma <zeyum@princeton.edu>
Date: Sun, 29 Sep 2024 21:36:14 -0400
Subject: [PATCH] random center; extend to indoors; avoid bad cases;
 postprocess script

---
 infinigen/core/placement/camera.py            |  97 +++++++--
 .../configs/data_schema/multiview_stereo.gin  |  15 ++
 infinigen/datagen/job_funcs.py                |  10 +-
 infinigen/tools/process_mvs_data.py           | 198 ++++++++++++++++++
 .../configs_indoor/base_indoors.gin           |   4 +-
 .../configs_nature/multiview_stereo.gin       |   9 +
 infinigen_examples/configs_nature/mvs.gin     |   5 -
 infinigen_examples/generate_indoors.py        |   1 +
 infinigen_examples/generate_nature.py         |   5 +-
 9 files changed, 320 insertions(+), 24 deletions(-)
 create mode 100644 infinigen/datagen/configs/data_schema/multiview_stereo.gin
 create mode 100644 infinigen/tools/process_mvs_data.py
 create mode 100644 infinigen_examples/configs_nature/multiview_stereo.gin
 delete mode 100644 infinigen_examples/configs_nature/mvs.gin

diff --git a/infinigen/core/placement/camera.py b/infinigen/core/placement/camera.py
index 7774ebe9a..7f796a370 100644
--- a/infinigen/core/placement/camera.py
+++ b/infinigen/core/placement/camera.py
@@ -266,6 +266,7 @@ def camera_pose_proposal(
     location_sample: typing.Callable | tuple,
     center_coordinate=None,
     radius=None,
+    bbox=None,
     altitude=("uniform", 1.5, 2.5),
     roll=0,
     yaw=("uniform", -180, 180),
@@ -282,15 +283,31 @@ def location_sample():
     if override_loc is not None:
         loc = Vector(random_general(override_loc))
     elif center_coordinate:
-        # Define the radius of the circle
-        random_angle = np.random.uniform(2 * np.math.pi)
-        xoff = np.random.uniform(-radius/10, radius/10)
-        yoff = np.random.uniform(-radius/10, radius/10)
-        zoff = np.random.uniform(center_coordinate[2]+5, center_coordinate[2]+8)
-        loc = Vector([0, 0, 0])
-        loc[0] = center_coordinate[0] + radius * np.math.cos(random_angle) + xoff
-        loc[1] = center_coordinate[1] + radius * np.math.sin(random_angle) + yoff
-        loc[2] = center_coordinate[2] + zoff
+        while True:
+            # Define the radius of the circle
+            random_angle = np.random.uniform(2 * np.math.pi)
+            xoff = np.random.uniform(-radius / 10, radius / 10)
+            yoff = np.random.uniform(-radius / 10, radius / 10)
+            zoff = random_general(altitude)
+            loc = Vector([0, 0, 0])
+            loc[0] = center_coordinate[0] + radius * np.math.cos(random_angle) + xoff
+            loc[1] = center_coordinate[1] + radius * np.math.sin(random_angle) + yoff
+            loc[2] = center_coordinate[2] + zoff
+            if bbox is not None:
+                out_of_bbox = False
+                for i in range(3):
+                    if loc[i] < bbox[0][i] or loc[i] > bbox[1][i]:
+                        out_of_bbox = True
+                        break
+                if out_of_bbox:
+                    continue
+            hit, *_ = scene_bvh.ray_cast(
+                loc,
+                Vector(center_coordinate) - loc,
+                (Vector(center_coordinate) - loc).length,
+            )
+            if hit is None:
+                break
     elif altitude is None:
         loc = location_sample()
     else:
@@ -306,8 +323,8 @@ def location_sample():
     if center_coordinate:
         direction = loc - Vector(center_coordinate)
         direction = Vector(direction)
-        rotation_matrix = direction.to_track_quat('Z', 'Y').to_matrix()
-        rotation_euler = rotation_matrix.to_euler('XYZ')
+        rotation_matrix = direction.to_track_quat("Z", "Y").to_matrix()
+        rotation_euler = rotation_matrix.to_euler("XYZ")
         roll, pitch, yaw = rotation_euler
         noise_range = np.deg2rad(5.0)  # 5 degrees of noise in radians
         # Add random noise to roll, pitch, and yaw
@@ -316,7 +333,9 @@ def location_sample():
         yaw += np.random.uniform(-noise_range, noise_range)
         rot = np.array([roll, pitch, yaw])
     else:
-        rot = np.deg2rad([random_general(pitch), random_general(roll), random_general(yaw)])
+        rot = np.deg2rad(
+            [random_general(pitch), random_general(roll), random_general(yaw)]
+        )
     focal_length = random_general(focal_length)
     return CameraProposal(loc, rot, focal_length)
 
@@ -410,7 +429,7 @@ def __call__(self, camera_rig, frame_curr, retry_pct, bvh):
         bbox = (camera_rig.location - margin, camera_rig.location + margin)
 
         for _ in range(self.retries):
-            res = camera_pose_proposal(bvh, bbox) # !
+            res = camera_pose_proposal(bvh, bbox)  # !
             if res is None:
                 continue
             dist = np.linalg.norm(np.array(res.loc) - np.array(camera_rig.location))
@@ -434,6 +453,8 @@ def compute_base_views(
     scene_bvh,
     location_sample: typing.Callable,
     center_coordinate=None,
+    radius=None,
+    bbox=None,
     placeholders_kd=None,
     camera_selection_answers={},
     vertexwise_min_dist=None,
@@ -444,14 +465,21 @@ def compute_base_views(
 ):
     potential_views = []
     n_min_candidates = int(min_candidates_ratio * n_views)
-    random_radius = np.random.uniform(12, 18)
     logger.debug("Center Coordinate", center_coordinate)
     with tqdm(total=n_min_candidates, desc="Searching for camera viewpoints") as pbar:
         for it in range(1, max_tries):
             if center_coordinate:
-                props = camera_pose_proposal(scene_bvh=scene_bvh, location_sample=location_sample, center_coordinate=center_coordinate, radius=random_radius)
+                props = camera_pose_proposal(
+                    scene_bvh=scene_bvh,
+                    location_sample=location_sample,
+                    center_coordinate=center_coordinate,
+                    radius=random_general(radius),
+                    bbox=bbox,
+                )
             else:
-                props = camera_pose_proposal(scene_bvh=scene_bvh, location_sample=location_sample, radius=random_radius)
+                props = camera_pose_proposal(
+                    scene_bvh=scene_bvh, location_sample=location_sample
+                )
 
             if props is None:
                 logger.debug(
@@ -640,6 +668,10 @@ def configure_cameras(
     scene_preprocessed: dict,
     init_bounding_box: tuple[np.array, np.array] = None,
     init_surfaces: list[bpy.types.Object] = None,
+    terrain_mesh=None,
+    nonroom_objs=None,
+    mvs_setting=False,
+    mvs_radius=("uniform", 12, 18),
 ):
     bpy.context.view_layer.update()
     dummy_camera = spawn_camera()
@@ -658,10 +690,43 @@ def location_sample():
     else:
         raise ValueError("Either init_bounding_box or init_surfaces must be provided")
 
+    if mvs_setting:
+        if terrain_mesh:
+            vertices = np.array([np.array(v.co) for v in terrain_mesh.data.vertices])
+            sdfs = scene_preprocessed["terrain"].compute_camera_space_sdf(vertices)
+            vertices = vertices[sdfs >= -1e-5]
+            center_coordinate = list(
+                vertices[np.random.choice(list(range(len(vertices))))]
+            )
+        elif nonroom_objs:
+
+            def contain_keywords(name, keywords):
+                for keyword in keywords:
+                    if name == keyword or name.startswith(f"{keyword}."):
+                        return True
+                return False
+
+            inside_objs = [
+                x
+                for x in nonroom_objs
+                if not contain_keywords(x.name, ["window", "door", "entrance"])
+            ]
+            assert inside_objs != []
+            obj = np.random.choice(inside_objs)
+            vertices = [v.co for v in obj.data.vertices]
+            center_coordinate = vertices[np.random.choice(list(range(len(vertices))))]
+            center_coordinate = obj.matrix_world @ center_coordinate
+            center_coordinate = list(np.array(center_coordinate))
+    else:
+        center_coordinate = None
+
     base_views = compute_base_views(
         dummy_camera,
         n_views=len(cam_rigs),
         location_sample=location_sample,
+        center_coordinate=center_coordinate,
+        radius=mvs_radius,
+        bbox=init_bounding_box,
         **scene_preprocessed,
     )
 
diff --git a/infinigen/datagen/configs/data_schema/multiview_stereo.gin b/infinigen/datagen/configs/data_schema/multiview_stereo.gin
new file mode 100644
index 000000000..b30df2b32
--- /dev/null
+++ b/infinigen/datagen/configs/data_schema/multiview_stereo.gin
@@ -0,0 +1,15 @@
+iterate_scene_tasks.frame_range=[1,1]
+iterate_scene_tasks.render_frame_range=[1,1]
+iterate_scene_tasks.cam_id_ranges = [30,1]
+
+iterate_scene_tasks.global_tasks = [
+    {'name': 'coarse', 'func': @queue_coarse},
+    {'name': "fineterrain", 'func': @queue_fine_terrain},
+    {'name': "populate", 'func': @queue_populate},
+    {'name': 'backuppopulate', 'func': @renderbackup/queue_populate, 'condition': 'prev_failed'}
+]
+iterate_scene_tasks.view_dependent_tasks = []
+iterate_scene_tasks.camera_dependent_tasks = [
+    {'name': 'shortrender', 'func': @rendershort/queue_render},
+    {'name': 'backuprender', 'func': @renderbackup/queue_render, 'condition': 'prev_failed'},
+]
diff --git a/infinigen/datagen/job_funcs.py b/infinigen/datagen/job_funcs.py
index 096ea5ce8..0603171ad 100644
--- a/infinigen/datagen/job_funcs.py
+++ b/infinigen/datagen/job_funcs.py
@@ -217,6 +217,7 @@ def queue_populate(
     configs,
     taskname=None,
     input_prefix="fine",
+    exclude_gpus=[],
     overrides=[],
     input_indices=None,
     output_indices=None,
@@ -250,7 +251,14 @@ def queue_populate(
     with (folder / "run_pipeline.sh").open("a") as f:
         f.write(f"{' '.join(' '.join(cmd).split())}\n\n")
 
-    res = submit_cmd(cmd, folder=folder, name=name, gpus=0, **kwargs)
+    res = submit_cmd(
+        cmd,
+        folder=folder,
+        name=name,
+        gpus=0,
+        slurm_exclude=nodes_with_gpus(*exclude_gpus),
+        **kwargs,
+    )
     return res, output_folder
 
 
diff --git a/infinigen/tools/process_mvs_data.py b/infinigen/tools/process_mvs_data.py
new file mode 100644
index 000000000..4d4e2b18e
--- /dev/null
+++ b/infinigen/tools/process_mvs_data.py
@@ -0,0 +1,198 @@
+# Copyright (C) 2024, Princeton University.
+# This source code is licensed under the BSD 3-Clause license found in the LICENSE file in the root directory of this source tree.
+
+# Authors: Zeyu Ma
+
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+import cv2
+import numpy as np
+import submitit
+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+
+from infinigen.tools.suffixes import parse_suffix
+
+
+# these functions till check_cycle_consistency are from https://github.com/princeton-vl/SEA-RAFT
+def transform(T, p):
+    assert T.shape == (4, 4)
+    return np.einsum("H W j, i j -> H W i", p, T[:3, :3]) + T[:3, 3]
+
+
+def from_homog(x):
+    return x[..., :-1] / x[..., [-1]]
+
+
+def coords_grid(batch, ht, wd, device):
+    coords = torch.meshgrid(
+        torch.arange(ht, device=device), torch.arange(wd, device=device)
+    )
+    coords = torch.stack(coords[::-1], dim=0).float()
+    return coords[None].repeat(batch, 1, 1, 1)
+
+
+def reproject(depth1, pose1, pose2, K1, K2):
+    H, W = depth1.shape
+    x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy")
+    img_1_coords = np.stack((x, y, np.ones_like(x)), axis=-1).astype(np.float64)
+    cam1_coords = np.einsum(
+        "H W, H W j, i j -> H W i", depth1, img_1_coords, np.linalg.inv(K1)
+    )
+    rel_pose = np.linalg.inv(pose2) @ pose1
+    cam2_coords = transform(rel_pose, cam1_coords)
+    return from_homog(np.einsum("H W j, i j -> H W i", cam2_coords, K2))
+
+
+def induced_flow(depth0, depth1, data):
+    H, W = depth0.shape
+    coords1 = reproject(depth0, data["T0"], data["T1"], data["K0"], data["K1"])
+
+    x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy")
+    coords0 = np.stack([x, y], axis=-1)
+    flow_01 = coords1 - coords0
+
+    H, W = depth1.shape
+    coords1 = reproject(depth1, data["T1"], data["T0"], data["K1"], data["K0"])
+    x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy")
+    coords0 = np.stack([x, y], axis=-1)
+    flow_10 = coords1 - coords0
+
+    return flow_01, flow_10
+
+
+def bilinear_sampler(img, coords, mode="bilinear", mask=False):
+    """Wrapper for grid_sample, uses pixel coordinates"""
+    H, W = img.shape[-2:]
+    xgrid, ygrid = coords.split([1, 1], dim=-1)
+    xgrid = 2 * xgrid / (W - 1) - 1
+    ygrid = 2 * ygrid / (H - 1) - 1
+
+    grid = torch.cat([xgrid, ygrid], dim=-1)
+    img = F.grid_sample(img, grid, align_corners=True)
+
+    if mask:
+        mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
+        return img, mask.float()
+
+    return img
+
+
+def check_cycle_consistency(flow_01, flow_10, threshold=1):
+    flow_01 = torch.from_numpy(flow_01).permute(2, 0, 1)[None]
+    flow_10 = torch.from_numpy(flow_10).permute(2, 0, 1)[None]
+    H, W = flow_01.shape[-2:]
+    coords = coords_grid(1, H, W, flow_01.device)
+    coords1 = coords + flow_01
+    flow_reprojected = bilinear_sampler(flow_10, coords1.permute(0, 2, 3, 1))
+    cycle = flow_reprojected + flow_01
+    cycle = torch.norm(cycle, dim=1)
+    mask = (cycle < threshold).float()
+    return mask[0].numpy()
+
+
+def compute_covisibility(depth0, depth1, camview0, camview1):
+    data = {}
+    data["K0"] = camview0["K"]
+    data["K1"] = camview1["K"]
+    data["T0"] = camview0["T"]
+    data["T1"] = camview1["T"]
+    flow_01, flow_10 = induced_flow(depth0, depth1, data)
+    mask = check_cycle_consistency(flow_01, flow_10)
+    return mask.mean()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--source_folder", type=Path, default=None)
+    parser.add_argument("--target_folder", type=Path)
+    parser.add_argument("--postprocess_only", type=int, default=False)
+    args = parser.parse_args()
+
+    source_folder = args.source_folder
+    target_folder = args.target_folder
+
+    if not args.postprocess_only:
+        scenes = [
+            x for x in os.listdir(source_folder) if os.path.isdir(source_folder / x)
+        ]
+        for scene in tqdm(scenes):
+            image_dir = source_folder / scene / "frames/Image/camera_0"
+            if not os.path.exists(image_dir):
+                continue
+            images = [x for x in os.listdir(image_dir) if x.endswith(".png")]
+            for image in images:
+                im = cv2.imread(image_dir / image)
+                if im.mean() < 20:
+                    continue
+                camera_path = (
+                    source_folder
+                    / scene
+                    / f"frames/camview/camera_0/camview{image[5:-4]}.npz"
+                )
+                depth_path = (
+                    source_folder
+                    / scene
+                    / f"frames/Depth/camera_0/Depth{image[5:-4]}.npy"
+                )
+                if not os.path.exists(camera_path):
+                    continue
+                if not os.path.exists(depth_path):
+                    continue
+                (target_folder / scene / "images").mkdir(parents=True, exist_ok=True)
+                (target_folder / scene / "cameras").mkdir(parents=True, exist_ok=True)
+                (target_folder / scene / "depths").mkdir(parents=True, exist_ok=True)
+                cam_id = parse_suffix(image)["cam_rig"]
+                shutil.copy(
+                    image_dir / image,
+                    target_folder / scene / "images" / f"{cam_id:04d}.png",
+                )
+                shutil.copy(
+                    camera_path, target_folder / scene / "cameras" / f"{cam_id:04d}.npz"
+                )
+                shutil.copy(
+                    depth_path, target_folder / scene / "depths" / f"{cam_id:04d}.npy"
+                )
+
+    scenes = os.listdir(target_folder)
+
+    def worker(scene):
+        cam_ids = [
+            x[:-4]
+            for x in os.listdir(target_folder / scene / "images")
+            if x.endswith(".png")
+        ]
+        with open(target_folder / scene / "pairs.txt", "w") as f:
+            for cam_id0 in cam_ids:
+                f.write(f"{cam_id0} ")
+                depth_path = target_folder / scene / f"depths/{cam_id0}.npy"
+                camera_path = target_folder / scene / f"cameras/{cam_id0}.npz"
+                depth0 = np.load(depth_path)
+                camview0 = np.load(camera_path)
+                for cam_id1 in cam_ids:
+                    if cam_id1 == cam_id0:
+                        continue
+                    depth_path = target_folder / scene / f"depths/{cam_id1}.npy"
+                    camera_path = target_folder / scene / f"cameras/{cam_id1}.npz"
+                    depth1 = np.load(depth_path)
+                    camview1 = np.load(camera_path)
+                    cov = compute_covisibility(depth0, depth1, camview0, camview1)
+                    f.write(f" {cam_id1} {cov}")
+                f.write("\n")
+        thumbnails = []
+        for image in os.listdir(target_folder / scene / "images"):
+            im = cv2.imread(target_folder / scene / "images" / image)
+            H, W = im.shape[:2]
+            thumbnails.append(cv2.resize(im, (W // 10, H // 10)))
+        thumbnails = np.concatenate(thumbnails, 1)
+        cv2.imwrite(target_folder / scene / "thumbnails.png", thumbnails)
+
+    log_folder = "~/sc/logs/%j"
+    executor = submitit.AutoExecutor(folder=log_folder)
+    executor.update_parameters(timeout_min=10, slurm_partition="allcs")
+    for scene in scenes:
+        job = executor.submit(worker, scene)
diff --git a/infinigen_examples/configs_indoor/base_indoors.gin b/infinigen_examples/configs_indoor/base_indoors.gin
index 90fd9ac0c..d22722d06 100644
--- a/infinigen_examples/configs_indoor/base_indoors.gin
+++ b/infinigen_examples/configs_indoor/base_indoors.gin
@@ -70,4 +70,6 @@ compose_indoors.floating_objs_enabled = False
 compose_indoors.num_floating = ('discrete_uniform', 15, 25)
 compose_indoors.norm_floating_size = True
 compose_indoors.enable_collision_floating = False
-compose_indoors.enable_collision_solved = False
\ No newline at end of file
+compose_indoors.enable_collision_solved = False
+
+configure_cameras.mvs_radius = ("uniform", 1, 2)
diff --git a/infinigen_examples/configs_nature/multiview_stereo.gin b/infinigen_examples/configs_nature/multiview_stereo.gin
new file mode 100644
index 000000000..a56b1fab4
--- /dev/null
+++ b/infinigen_examples/configs_nature/multiview_stereo.gin
@@ -0,0 +1,9 @@
+camera.spawn_camera_rigs.n_camera_rigs = 30
+camera.spawn_camera_rigs.camera_rig_config = [
+    {'loc': (0, 0, 0), 'rot_euler': (0, 0, 0)},
+]
+configure_cameras.mvs_setting = True
+compose_nature.camera_selection_ranges_ratio = {}
+compose_nature.camera_selection_tags_ratio = {}
+compute_base_views.min_candidates_ratio = 1
+fine_terrain.mesher_backend = "OcMesher"
\ No newline at end of file
diff --git a/infinigen_examples/configs_nature/mvs.gin b/infinigen_examples/configs_nature/mvs.gin
deleted file mode 100644
index ea4fc2818..000000000
--- a/infinigen_examples/configs_nature/mvs.gin
+++ /dev/null
@@ -1,5 +0,0 @@
-camera.spawn_camera_rigs.n_camera_rigs = 10
-camera.spawn_camera_rigs.camera_rig_config = [
-    {'loc': (0, 0, 0), 'rot_euler': (0, 0, 0)},
-]
-camera.compute_base_views.center_coordinate=(30, 10, 5)
\ No newline at end of file
diff --git a/infinigen_examples/generate_indoors.py b/infinigen_examples/generate_indoors.py
index 129d9a8c3..7f73f80d3 100644
--- a/infinigen_examples/generate_indoors.py
+++ b/infinigen_examples/generate_indoors.py
@@ -252,6 +252,7 @@ def pose_cameras():
             camera_rigs,
             scene_preprocessed=scene_preprocessed,
             init_surfaces=solved_floor_surface,
+            nonroom_objs=nonroom_objs,
         )
         butil.delete(solved_floor_surface)
         return scene_preprocessed
diff --git a/infinigen_examples/generate_nature.py b/infinigen_examples/generate_nature.py
index 8f689dc61..f982c49de 100644
--- a/infinigen_examples/generate_nature.py
+++ b/infinigen_examples/generate_nature.py
@@ -286,7 +286,10 @@ def camera_preprocess():
     p.run_stage(
         "pose_cameras",
         lambda: cam_util.configure_cameras(
-            camera_rigs, scene_preprocessed, init_bounding_box=bbox
+            camera_rigs,
+            scene_preprocessed,
+            init_bounding_box=bbox,
+            terrain_mesh=terrain_mesh,
         ),
         use_chance=False,
     )