update

Signed-off-by: ligo <[email protected]>
NVIDIAGameWorks · Mar 15, 2024 · f84c78a · f84c78a
1 parent 89d9f83
commit f84c78a
Showing 1 changed file with 28 additions and 65 deletions.
diff --git a/kaolin/render/camera/intrinsics_ortho.py b/kaolin/render/camera/intrinsics_ortho.py
@@ -16,27 +16,20 @@
 from __future__ import annotations
 from typing import Type, Union
 import torch
-from .intrinsics import (
-    CameraIntrinsics,
-    IntrinsicsParamsDefEnum,
-    up_to_homogeneous,
-    down_from_homogeneous,
-    default_dtype,
-)
-
-__all__ = ["OrthographicIntrinsics"]
+from .intrinsics import CameraIntrinsics, IntrinsicsParamsDefEnum,\
+    up_to_homogeneous, down_from_homogeneous, default_dtype
 
+__all__ = [
+    "OrthographicIntrinsics"
+]
 
 class OrthoParamsDefEnum(IntrinsicsParamsDefEnum):
     """Orthographic projections do not use real intrinsics.
     However since for this type of projection all objects appear at the same
     distance to the camera, a scale factor is included with the intrinsics, to allow
     for "zoom" adjustments.
     """
-
-    fov_distance = (
-        0  # Zoom factor, to adjust the scale of the view. Measured in distance units.
-    )
+    fov_distance = 0   # Zoom factor, to adjust the scale of the view. Measured in distance units.
 
 
 class OrthographicIntrinsics(CameraIntrinsics):
@@ -54,18 +47,11 @@ class OrthographicIntrinsics(CameraIntrinsics):
 
     The matrix returned by this class supports differentiable torch operations.
     """
-
     DEFAULT_NEAR = 1e-2
     DEFAULT_FAR = 1e2
 
-    def __init__(
-        self,
-        width: int,
-        height: int,
-        params: torch.Tensor,
-        near: float = DEFAULT_NEAR,
-        far: float = DEFAULT_FAR,
-    ):
+    def __init__(self, width: int, height: int, params: torch.Tensor,
+                 near: float = DEFAULT_NEAR, far: float = DEFAULT_FAR):
         super().__init__(width, height, params, near, far)
 
     @classmethod
@@ -81,20 +67,14 @@ def param_types(cls) -> Type[IntrinsicsParamsDefEnum]:
 
     @property
     def lens_type(self) -> str:
-        return "ortho"
+        return 'ortho'
 
     @classmethod
-    def from_frustum(
-        cls,
-        width: int,
-        height: int,
-        fov_distance: float = 1.0,
-        near: float = DEFAULT_NEAR,
-        far: float = DEFAULT_FAR,
-        num_cameras: int = 1,
-        device: Union[torch.device, str] = None,
-        dtype: torch.dtype = default_dtype,
-    ) -> OrthographicIntrinsics:
+    def from_frustum(cls, width: int, height: int, fov_distance: float = 1.0,
+                     near: float = DEFAULT_NEAR, far: float = DEFAULT_FAR,
+                     num_cameras: int = 1,
+                     device: Union[torch.device, str] = None,
+                     dtype: torch.dtype = default_dtype) -> OrthographicIntrinsics:
         """Constructs a new instance of OrthographicIntrinsics from view frustum dimensions
 
         fov_distance artificially defines the "zoom scale" of the view.
@@ -116,9 +96,7 @@ def from_frustum(
         Returns:
             (OrthographicIntrinsics): the constructed orthographic camera intrinsics
         """
-        params = cls._allocate_params(
-            fov_distance, num_cameras=num_cameras, device=device, dtype=dtype
-        )
+        params = cls._allocate_params(fov_distance, num_cameras=num_cameras, device=device, dtype=dtype)
         return OrthographicIntrinsics(width, height, params, near, far)
 
     def orthographic_matrix(self, left, right, bottom, top, near, far) -> torch.Tensor:
@@ -155,12 +133,8 @@ def orthographic_matrix(self, left, right, bottom, top, near, far) -> torch.Tens
         """
         zero = torch.zeros_like(self.fov_distance)
         one = torch.ones_like(self.fov_distance)
-        tx = torch.full_like(
-            self.fov_distance, fill_value=-(right + left) / (right - left)
-        )
-        ty = torch.full_like(
-            self.fov_distance, fill_value=-(top + bottom) / (top - bottom)
-        )
+        tx = torch.full_like(self.fov_distance, fill_value=-(right + left) / (right - left))
+        ty = torch.full_like(self.fov_distance, fill_value=-(top + bottom) / (top - bottom))
         tz = torch.full_like(self.fov_distance, fill_value=-(far + near) / (far - near))
         W = right - left
         H = top - bottom
@@ -173,10 +147,10 @@ def orthographic_matrix(self, left, right, bottom, top, near, far) -> torch.Tens
         #     [0.0,              0.0,              -2.0 / D,   tz],
         #     [0.0,              0.0,              0,          1.0]
         rows = [
-            torch.stack([2.0 / (fov * W), zero, zero, tx], dim=-1),
-            torch.stack([zero, 2.0 / (fov * H), zero, ty], dim=-1),
-            torch.stack([zero, zero, -2.0 / D, tz], dim=-1),
-            torch.stack([zero, zero, zero, one], dim=-1),
+            torch.stack([2.0 / (fov * W),  zero,             zero,       tx],       dim=-1),
+            torch.stack([zero,             2.0 / (fov * H),  zero,       ty],       dim=-1),
+            torch.stack([zero,             zero,             -2.0 / D,   tz],       dim=-1),
+            torch.stack([zero,             zero,             zero,       one],     dim=-1)
         ]
         ortho_mat = torch.stack(rows, dim=1)
         return ortho_mat
@@ -195,7 +169,6 @@ def projection_matrix(self) -> torch.Tensor:
         bottom = -top
         right = self.width / 2
         left = -right
-
         ortho = self.orthographic_matrix(left, right, bottom, top, self.near, self.far)
         return ortho
 
@@ -224,15 +197,11 @@ def transform(self, vectors: torch.Tensor) -> torch.Tensor:
         # Expand input vectors to 4D homogeneous coordinates if needed
         homogeneous_vecs = up_to_homogeneous(vectors)
 
-        num_cameras = len(self)  # C - number of cameras
+        num_cameras = len(self)         # C - number of cameras
         batch_size = vectors.shape[-2]  # B - number of vectors
 
-        v = homogeneous_vecs.expand(num_cameras, batch_size, 4)[
-            ..., None
-        ]  # Expand as (C, B, 4, 1)
-        proj = proj[:, None].expand(
-            num_cameras, batch_size, 4, 4
-        )  # Expand as (C, B, 4, 4)
+        v = homogeneous_vecs.expand(num_cameras, batch_size, 4)[..., None]  # Expand as (C, B, 4, 1)
+        proj = proj[:, None].expand(num_cameras, batch_size, 4, 4)          # Expand as (C, B, 4, 4)
 
         transformed_v = proj @ v
         transformed_v = transformed_v.squeeze(-1)  # Reshape:  (C, B, 4)
@@ -258,17 +227,13 @@ def normalize_depth(self, depth: torch.Tensor) -> torch.Tensor:
         proj = self.projection_matrix()
         a = -proj[:, 2, 2]
         b = -proj[:, 2, 3]
-        depth = torch.clamp(
-            depth, min=min(self.near, self.far), max=max(self.near, self.far)
-        )
+        depth = torch.clamp(depth, min=min(self.near, self.far), max=max(self.near, self.far))
         # Here we allow depth to be 0, as it will result in 'inf' values which torch will soon clamp.
         # If b is 0 as well, it most likely means the choice of near / far planes and ndc coordinates is invalid.
-        ndc_depth = a - b / depth  # from near: ndc_min to far: ndc_nax
+        ndc_depth = a - b / depth                   # from near: ndc_min to far: ndc_nax
         ndc_min = min(self.ndc_min, self.ndc_max)
         ndc_max = max(self.ndc_min, self.ndc_max)
-        normalized_depth = (ndc_depth - ndc_min) / (
-            ndc_max - ndc_min
-        )  # from near: 0 to far: 1
+        normalized_depth = (ndc_depth - ndc_min) / (ndc_max - ndc_min)  # from near: 0 to far: 1
         normalized_depth = torch.clamp(normalized_depth, min=0.0, max=1.0)
         return normalized_depth
 
@@ -282,6 +247,4 @@ def fov_distance(self, val: Union[float, torch.Tensor]) -> None:
 
     def zoom(self, amount):
         self.fov_distance += amount
-        self.fov_distance = torch.max(
-            self.fov_distance, self.fov_distance.new_tensor(1e-5)
-        )  # Don't go below eps
+        self.fov_distance = torch.max(self.fov_distance, self.fov_distance.new_tensor(1e-5))    # Don't go below eps