From 3a51a199482f596c3b98b921c79d337010f48bb1 Mon Sep 17 00:00:00 2001 From: Zeyu Ma Date: Sun, 29 Sep 2024 21:36:14 -0400 Subject: [PATCH] random center; extend to indoors; avoid bad cases; postprocess script --- infinigen/core/placement/camera.py | 97 +++++++-- .../configs/data_schema/multiview_stereo.gin | 15 ++ infinigen/datagen/job_funcs.py | 10 +- infinigen/tools/process_mvs_data.py | 198 ++++++++++++++++++ .../configs_indoor/base_indoors.gin | 4 +- .../configs_nature/multiview_stereo.gin | 9 + infinigen_examples/configs_nature/mvs.gin | 5 - infinigen_examples/generate_indoors.py | 1 + infinigen_examples/generate_nature.py | 5 +- 9 files changed, 320 insertions(+), 24 deletions(-) create mode 100644 infinigen/datagen/configs/data_schema/multiview_stereo.gin create mode 100644 infinigen/tools/process_mvs_data.py create mode 100644 infinigen_examples/configs_nature/multiview_stereo.gin delete mode 100644 infinigen_examples/configs_nature/mvs.gin diff --git a/infinigen/core/placement/camera.py b/infinigen/core/placement/camera.py index 7774ebe9a..7f796a370 100644 --- a/infinigen/core/placement/camera.py +++ b/infinigen/core/placement/camera.py @@ -266,6 +266,7 @@ def camera_pose_proposal( location_sample: typing.Callable | tuple, center_coordinate=None, radius=None, + bbox=None, altitude=("uniform", 1.5, 2.5), roll=0, yaw=("uniform", -180, 180), @@ -282,15 +283,31 @@ def location_sample(): if override_loc is not None: loc = Vector(random_general(override_loc)) elif center_coordinate: - # Define the radius of the circle - random_angle = np.random.uniform(2 * np.math.pi) - xoff = np.random.uniform(-radius/10, radius/10) - yoff = np.random.uniform(-radius/10, radius/10) - zoff = np.random.uniform(center_coordinate[2]+5, center_coordinate[2]+8) - loc = Vector([0, 0, 0]) - loc[0] = center_coordinate[0] + radius * np.math.cos(random_angle) + xoff - loc[1] = center_coordinate[1] + radius * np.math.sin(random_angle) + yoff - loc[2] = center_coordinate[2] + zoff + while True: + # Define the radius of the circle + random_angle = np.random.uniform(2 * np.math.pi) + xoff = np.random.uniform(-radius / 10, radius / 10) + yoff = np.random.uniform(-radius / 10, radius / 10) + zoff = random_general(altitude) + loc = Vector([0, 0, 0]) + loc[0] = center_coordinate[0] + radius * np.math.cos(random_angle) + xoff + loc[1] = center_coordinate[1] + radius * np.math.sin(random_angle) + yoff + loc[2] = center_coordinate[2] + zoff + if bbox is not None: + out_of_bbox = False + for i in range(3): + if loc[i] < bbox[0][i] or loc[i] > bbox[1][i]: + out_of_bbox = True + break + if out_of_bbox: + continue + hit, *_ = scene_bvh.ray_cast( + loc, + Vector(center_coordinate) - loc, + (Vector(center_coordinate) - loc).length, + ) + if hit is None: + break elif altitude is None: loc = location_sample() else: @@ -306,8 +323,8 @@ def location_sample(): if center_coordinate: direction = loc - Vector(center_coordinate) direction = Vector(direction) - rotation_matrix = direction.to_track_quat('Z', 'Y').to_matrix() - rotation_euler = rotation_matrix.to_euler('XYZ') + rotation_matrix = direction.to_track_quat("Z", "Y").to_matrix() + rotation_euler = rotation_matrix.to_euler("XYZ") roll, pitch, yaw = rotation_euler noise_range = np.deg2rad(5.0) # 5 degrees of noise in radians # Add random noise to roll, pitch, and yaw @@ -316,7 +333,9 @@ def location_sample(): yaw += np.random.uniform(-noise_range, noise_range) rot = np.array([roll, pitch, yaw]) else: - rot = np.deg2rad([random_general(pitch), random_general(roll), random_general(yaw)]) + rot = np.deg2rad( + [random_general(pitch), random_general(roll), random_general(yaw)] + ) focal_length = random_general(focal_length) return CameraProposal(loc, rot, focal_length) @@ -410,7 +429,7 @@ def __call__(self, camera_rig, frame_curr, retry_pct, bvh): bbox = (camera_rig.location - margin, camera_rig.location + margin) for _ in range(self.retries): - res = camera_pose_proposal(bvh, bbox) # ! + res = camera_pose_proposal(bvh, bbox) # ! if res is None: continue dist = np.linalg.norm(np.array(res.loc) - np.array(camera_rig.location)) @@ -434,6 +453,8 @@ def compute_base_views( scene_bvh, location_sample: typing.Callable, center_coordinate=None, + radius=None, + bbox=None, placeholders_kd=None, camera_selection_answers={}, vertexwise_min_dist=None, @@ -444,14 +465,21 @@ def compute_base_views( ): potential_views = [] n_min_candidates = int(min_candidates_ratio * n_views) - random_radius = np.random.uniform(12, 18) logger.debug("Center Coordinate", center_coordinate) with tqdm(total=n_min_candidates, desc="Searching for camera viewpoints") as pbar: for it in range(1, max_tries): if center_coordinate: - props = camera_pose_proposal(scene_bvh=scene_bvh, location_sample=location_sample, center_coordinate=center_coordinate, radius=random_radius) + props = camera_pose_proposal( + scene_bvh=scene_bvh, + location_sample=location_sample, + center_coordinate=center_coordinate, + radius=random_general(radius), + bbox=bbox, + ) else: - props = camera_pose_proposal(scene_bvh=scene_bvh, location_sample=location_sample, radius=random_radius) + props = camera_pose_proposal( + scene_bvh=scene_bvh, location_sample=location_sample + ) if props is None: logger.debug( @@ -640,6 +668,10 @@ def configure_cameras( scene_preprocessed: dict, init_bounding_box: tuple[np.array, np.array] = None, init_surfaces: list[bpy.types.Object] = None, + terrain_mesh=None, + nonroom_objs=None, + mvs_setting=False, + mvs_radius=("uniform", 12, 18), ): bpy.context.view_layer.update() dummy_camera = spawn_camera() @@ -658,10 +690,43 @@ def location_sample(): else: raise ValueError("Either init_bounding_box or init_surfaces must be provided") + if mvs_setting: + if terrain_mesh: + vertices = np.array([np.array(v.co) for v in terrain_mesh.data.vertices]) + sdfs = scene_preprocessed["terrain"].compute_camera_space_sdf(vertices) + vertices = vertices[sdfs >= -1e-5] + center_coordinate = list( + vertices[np.random.choice(list(range(len(vertices))))] + ) + elif nonroom_objs: + + def contain_keywords(name, keywords): + for keyword in keywords: + if name == keyword or name.startswith(f"{keyword}."): + return True + return False + + inside_objs = [ + x + for x in nonroom_objs + if not contain_keywords(x.name, ["window", "door", "entrance"]) + ] + assert inside_objs != [] + obj = np.random.choice(inside_objs) + vertices = [v.co for v in obj.data.vertices] + center_coordinate = vertices[np.random.choice(list(range(len(vertices))))] + center_coordinate = obj.matrix_world @ center_coordinate + center_coordinate = list(np.array(center_coordinate)) + else: + center_coordinate = None + base_views = compute_base_views( dummy_camera, n_views=len(cam_rigs), location_sample=location_sample, + center_coordinate=center_coordinate, + radius=mvs_radius, + bbox=init_bounding_box, **scene_preprocessed, ) diff --git a/infinigen/datagen/configs/data_schema/multiview_stereo.gin b/infinigen/datagen/configs/data_schema/multiview_stereo.gin new file mode 100644 index 000000000..b30df2b32 --- /dev/null +++ b/infinigen/datagen/configs/data_schema/multiview_stereo.gin @@ -0,0 +1,15 @@ +iterate_scene_tasks.frame_range=[1,1] +iterate_scene_tasks.render_frame_range=[1,1] +iterate_scene_tasks.cam_id_ranges = [30,1] + +iterate_scene_tasks.global_tasks = [ + {'name': 'coarse', 'func': @queue_coarse}, + {'name': "fineterrain", 'func': @queue_fine_terrain}, + {'name': "populate", 'func': @queue_populate}, + {'name': 'backuppopulate', 'func': @renderbackup/queue_populate, 'condition': 'prev_failed'} +] +iterate_scene_tasks.view_dependent_tasks = [] +iterate_scene_tasks.camera_dependent_tasks = [ + {'name': 'shortrender', 'func': @rendershort/queue_render}, + {'name': 'backuprender', 'func': @renderbackup/queue_render, 'condition': 'prev_failed'}, +] diff --git a/infinigen/datagen/job_funcs.py b/infinigen/datagen/job_funcs.py index 096ea5ce8..0603171ad 100644 --- a/infinigen/datagen/job_funcs.py +++ b/infinigen/datagen/job_funcs.py @@ -217,6 +217,7 @@ def queue_populate( configs, taskname=None, input_prefix="fine", + exclude_gpus=[], overrides=[], input_indices=None, output_indices=None, @@ -250,7 +251,14 @@ def queue_populate( with (folder / "run_pipeline.sh").open("a") as f: f.write(f"{' '.join(' '.join(cmd).split())}\n\n") - res = submit_cmd(cmd, folder=folder, name=name, gpus=0, **kwargs) + res = submit_cmd( + cmd, + folder=folder, + name=name, + gpus=0, + slurm_exclude=nodes_with_gpus(*exclude_gpus), + **kwargs, + ) return res, output_folder diff --git a/infinigen/tools/process_mvs_data.py b/infinigen/tools/process_mvs_data.py new file mode 100644 index 000000000..4d4e2b18e --- /dev/null +++ b/infinigen/tools/process_mvs_data.py @@ -0,0 +1,198 @@ +# Copyright (C) 2024, Princeton University. +# This source code is licensed under the BSD 3-Clause license found in the LICENSE file in the root directory of this source tree. + +# Authors: Zeyu Ma + +import argparse +import os +import shutil +from pathlib import Path + +import cv2 +import numpy as np +import submitit +import torch +import torch.nn.functional as F +from tqdm import tqdm + +from infinigen.tools.suffixes import parse_suffix + + +# these functions till check_cycle_consistency are from https://github.com/princeton-vl/SEA-RAFT +def transform(T, p): + assert T.shape == (4, 4) + return np.einsum("H W j, i j -> H W i", p, T[:3, :3]) + T[:3, 3] + + +def from_homog(x): + return x[..., :-1] / x[..., [-1]] + + +def coords_grid(batch, ht, wd, device): + coords = torch.meshgrid( + torch.arange(ht, device=device), torch.arange(wd, device=device) + ) + coords = torch.stack(coords[::-1], dim=0).float() + return coords[None].repeat(batch, 1, 1, 1) + + +def reproject(depth1, pose1, pose2, K1, K2): + H, W = depth1.shape + x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy") + img_1_coords = np.stack((x, y, np.ones_like(x)), axis=-1).astype(np.float64) + cam1_coords = np.einsum( + "H W, H W j, i j -> H W i", depth1, img_1_coords, np.linalg.inv(K1) + ) + rel_pose = np.linalg.inv(pose2) @ pose1 + cam2_coords = transform(rel_pose, cam1_coords) + return from_homog(np.einsum("H W j, i j -> H W i", cam2_coords, K2)) + + +def induced_flow(depth0, depth1, data): + H, W = depth0.shape + coords1 = reproject(depth0, data["T0"], data["T1"], data["K0"], data["K1"]) + + x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy") + coords0 = np.stack([x, y], axis=-1) + flow_01 = coords1 - coords0 + + H, W = depth1.shape + coords1 = reproject(depth1, data["T1"], data["T0"], data["K1"], data["K0"]) + x, y = np.meshgrid(np.arange(W), np.arange(H), indexing="xy") + coords0 = np.stack([x, y], axis=-1) + flow_10 = coords1 - coords0 + + return flow_01, flow_10 + + +def bilinear_sampler(img, coords, mode="bilinear", mask=False): + """Wrapper for grid_sample, uses pixel coordinates""" + H, W = img.shape[-2:] + xgrid, ygrid = coords.split([1, 1], dim=-1) + xgrid = 2 * xgrid / (W - 1) - 1 + ygrid = 2 * ygrid / (H - 1) - 1 + + grid = torch.cat([xgrid, ygrid], dim=-1) + img = F.grid_sample(img, grid, align_corners=True) + + if mask: + mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) + return img, mask.float() + + return img + + +def check_cycle_consistency(flow_01, flow_10, threshold=1): + flow_01 = torch.from_numpy(flow_01).permute(2, 0, 1)[None] + flow_10 = torch.from_numpy(flow_10).permute(2, 0, 1)[None] + H, W = flow_01.shape[-2:] + coords = coords_grid(1, H, W, flow_01.device) + coords1 = coords + flow_01 + flow_reprojected = bilinear_sampler(flow_10, coords1.permute(0, 2, 3, 1)) + cycle = flow_reprojected + flow_01 + cycle = torch.norm(cycle, dim=1) + mask = (cycle < threshold).float() + return mask[0].numpy() + + +def compute_covisibility(depth0, depth1, camview0, camview1): + data = {} + data["K0"] = camview0["K"] + data["K1"] = camview1["K"] + data["T0"] = camview0["T"] + data["T1"] = camview1["T"] + flow_01, flow_10 = induced_flow(depth0, depth1, data) + mask = check_cycle_consistency(flow_01, flow_10) + return mask.mean() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--source_folder", type=Path, default=None) + parser.add_argument("--target_folder", type=Path) + parser.add_argument("--postprocess_only", type=int, default=False) + args = parser.parse_args() + + source_folder = args.source_folder + target_folder = args.target_folder + + if not args.postprocess_only: + scenes = [ + x for x in os.listdir(source_folder) if os.path.isdir(source_folder / x) + ] + for scene in tqdm(scenes): + image_dir = source_folder / scene / "frames/Image/camera_0" + if not os.path.exists(image_dir): + continue + images = [x for x in os.listdir(image_dir) if x.endswith(".png")] + for image in images: + im = cv2.imread(image_dir / image) + if im.mean() < 20: + continue + camera_path = ( + source_folder + / scene + / f"frames/camview/camera_0/camview{image[5:-4]}.npz" + ) + depth_path = ( + source_folder + / scene + / f"frames/Depth/camera_0/Depth{image[5:-4]}.npy" + ) + if not os.path.exists(camera_path): + continue + if not os.path.exists(depth_path): + continue + (target_folder / scene / "images").mkdir(parents=True, exist_ok=True) + (target_folder / scene / "cameras").mkdir(parents=True, exist_ok=True) + (target_folder / scene / "depths").mkdir(parents=True, exist_ok=True) + cam_id = parse_suffix(image)["cam_rig"] + shutil.copy( + image_dir / image, + target_folder / scene / "images" / f"{cam_id:04d}.png", + ) + shutil.copy( + camera_path, target_folder / scene / "cameras" / f"{cam_id:04d}.npz" + ) + shutil.copy( + depth_path, target_folder / scene / "depths" / f"{cam_id:04d}.npy" + ) + + scenes = os.listdir(target_folder) + + def worker(scene): + cam_ids = [ + x[:-4] + for x in os.listdir(target_folder / scene / "images") + if x.endswith(".png") + ] + with open(target_folder / scene / "pairs.txt", "w") as f: + for cam_id0 in cam_ids: + f.write(f"{cam_id0} ") + depth_path = target_folder / scene / f"depths/{cam_id0}.npy" + camera_path = target_folder / scene / f"cameras/{cam_id0}.npz" + depth0 = np.load(depth_path) + camview0 = np.load(camera_path) + for cam_id1 in cam_ids: + if cam_id1 == cam_id0: + continue + depth_path = target_folder / scene / f"depths/{cam_id1}.npy" + camera_path = target_folder / scene / f"cameras/{cam_id1}.npz" + depth1 = np.load(depth_path) + camview1 = np.load(camera_path) + cov = compute_covisibility(depth0, depth1, camview0, camview1) + f.write(f" {cam_id1} {cov}") + f.write("\n") + thumbnails = [] + for image in os.listdir(target_folder / scene / "images"): + im = cv2.imread(target_folder / scene / "images" / image) + H, W = im.shape[:2] + thumbnails.append(cv2.resize(im, (W // 10, H // 10))) + thumbnails = np.concatenate(thumbnails, 1) + cv2.imwrite(target_folder / scene / "thumbnails.png", thumbnails) + + log_folder = "~/sc/logs/%j" + executor = submitit.AutoExecutor(folder=log_folder) + executor.update_parameters(timeout_min=10, slurm_partition="allcs") + for scene in scenes: + job = executor.submit(worker, scene) diff --git a/infinigen_examples/configs_indoor/base_indoors.gin b/infinigen_examples/configs_indoor/base_indoors.gin index 90fd9ac0c..d22722d06 100644 --- a/infinigen_examples/configs_indoor/base_indoors.gin +++ b/infinigen_examples/configs_indoor/base_indoors.gin @@ -70,4 +70,6 @@ compose_indoors.floating_objs_enabled = False compose_indoors.num_floating = ('discrete_uniform', 15, 25) compose_indoors.norm_floating_size = True compose_indoors.enable_collision_floating = False -compose_indoors.enable_collision_solved = False \ No newline at end of file +compose_indoors.enable_collision_solved = False + +configure_cameras.mvs_radius = ("uniform", 1, 2) diff --git a/infinigen_examples/configs_nature/multiview_stereo.gin b/infinigen_examples/configs_nature/multiview_stereo.gin new file mode 100644 index 000000000..a56b1fab4 --- /dev/null +++ b/infinigen_examples/configs_nature/multiview_stereo.gin @@ -0,0 +1,9 @@ +camera.spawn_camera_rigs.n_camera_rigs = 30 +camera.spawn_camera_rigs.camera_rig_config = [ + {'loc': (0, 0, 0), 'rot_euler': (0, 0, 0)}, +] +configure_cameras.mvs_setting = True +compose_nature.camera_selection_ranges_ratio = {} +compose_nature.camera_selection_tags_ratio = {} +compute_base_views.min_candidates_ratio = 1 +fine_terrain.mesher_backend = "OcMesher" \ No newline at end of file diff --git a/infinigen_examples/configs_nature/mvs.gin b/infinigen_examples/configs_nature/mvs.gin deleted file mode 100644 index ea4fc2818..000000000 --- a/infinigen_examples/configs_nature/mvs.gin +++ /dev/null @@ -1,5 +0,0 @@ -camera.spawn_camera_rigs.n_camera_rigs = 10 -camera.spawn_camera_rigs.camera_rig_config = [ - {'loc': (0, 0, 0), 'rot_euler': (0, 0, 0)}, -] -camera.compute_base_views.center_coordinate=(30, 10, 5) \ No newline at end of file diff --git a/infinigen_examples/generate_indoors.py b/infinigen_examples/generate_indoors.py index 129d9a8c3..7f73f80d3 100644 --- a/infinigen_examples/generate_indoors.py +++ b/infinigen_examples/generate_indoors.py @@ -252,6 +252,7 @@ def pose_cameras(): camera_rigs, scene_preprocessed=scene_preprocessed, init_surfaces=solved_floor_surface, + nonroom_objs=nonroom_objs, ) butil.delete(solved_floor_surface) return scene_preprocessed diff --git a/infinigen_examples/generate_nature.py b/infinigen_examples/generate_nature.py index 8f689dc61..f982c49de 100644 --- a/infinigen_examples/generate_nature.py +++ b/infinigen_examples/generate_nature.py @@ -286,7 +286,10 @@ def camera_preprocess(): p.run_stage( "pose_cameras", lambda: cam_util.configure_cameras( - camera_rigs, scene_preprocessed, init_bounding_box=bbox + camera_rigs, + scene_preprocessed, + init_bounding_box=bbox, + terrain_mesh=terrain_mesh, ), use_chance=False, )