Publish belief images

This is based on @TontonTremblay's comment: NVlabs#141 (comment)
andrewyguo · Jul 8, 2021 · 4f18349 · 4f18349
1 parent 621e984
commit 4f18349
Show file tree

Hide file tree

Showing 5 changed files with 89 additions and 7 deletions.
diff --git a/config/config_pose.yaml b/config/config_pose.yaml
@@ -167,6 +167,8 @@ mesh_scales: {
     "bleach":  0.01,
 }
 
+overlay_belief_images: True   # Whether to overlay the input image on the belief images published on /dope/belief_[obj_name]
+
 # Config params for DOPE
 thresh_angle: 0.5
 thresh_map: 0.01

diff --git a/nodes/dope b/nodes/dope
@@ -98,6 +98,7 @@ class DopeNode(object):
         self.models = {}
         self.pnp_solvers = {}
         self.pub_dimension = {}
+        self.pub_belief = {}
         self.draw_colors = {}
         self.dimensions = {}
         self.class_ids = {}
@@ -109,6 +110,7 @@ class DopeNode(object):
 
         self.input_is_rectified = rospy.get_param('~input_is_rectified', True)
         self.downscale_height = rospy.get_param('~downscale_height', 500)
+        self.overlay_belief_images = rospy.get_param('~overlay_belief_images', True)
 
         self.config_detect = lambda: None
         self.config_detect.mask_edges = 1
@@ -170,6 +172,12 @@ class DopeNode(object):
                     String,
                     queue_size=10
                 )
+            self.pub_belief[model] = \
+                rospy.Publisher(
+                    '{}/belief_{}'.format(rospy.get_param('~topic_publishing'), model),
+                    ImageSensor_msg,
+                    queue_size=10
+                )
 
         # Start ROS publishers
         self.pub_rgb_dope_points = \
@@ -250,12 +258,16 @@ class DopeNode(object):
         detection_array.header = image_msg.header
 
         for m in self.models:
+            publish_belief_img = (self.pub_belief[m].get_num_connections() > 0)
+
             # Detect object
-            results = ObjectDetector.detect_object_in_image(
+            results, im_belief = ObjectDetector.detect_object_in_image(
                 self.models[m].net,
                 self.pnp_solvers[m],
                 img,
-                self.config_detect
+                self.config_detect,
+                make_belief_debug_img=publish_belief_img,
+                overlay_image=self.overlay_belief_images
             )
 
             # Publish pose and overlay cube on image
@@ -309,6 +321,12 @@ class DopeNode(object):
                         points2d.append(tuple(pair))
                     draw.draw_cube(points2d, self.draw_colors[m])
 
+            # Publish the belief image
+            if publish_belief_img:
+                belief_img = self.cv_bridge.cv2_to_imgmsg(np.array(im_belief)[..., ::-1], "bgr8")
+                belief_img.header = camera_info.header
+                self.pub_belief[m].publish(belief_img)
+
         # Publish the image with results overlaid
         rgb_points_img = CvBridge().cv2_to_imgmsg(np.array(im)[..., ::-1], "bgr8")
         rgb_points_img.header = camera_info.header

diff --git a/readme.md b/readme.md
@@ -97,6 +97,7 @@ The following steps describe the native installation. Alternatively, use the pro
     * `model_transforms`: dictionary of transforms that are applied to the pose before publishing (key values must match the `weights` names)
     * `meshes`: dictionary of mesh filenames for visualization (key values must match the `weights` names)
     * `mesh_scales`: dictionary of scaling factors for the visualization meshes (key values must match the `weights` names)
+    * `overlay_belief_images`: whether to overlay the input image on the belief images published on /dope/belief_[obj_name]
     * `thresh_angle`: undocumented
     * `thresh_map`: undocumented
     * `sigma`: undocumented
@@ -112,7 +113,7 @@ The following steps describe the native installation. Alternatively, use the pro
 
 * The following ROS topics are published (assuming `topic_publishing == 'dope'`):
     ```
-    /dope/webcam_rgb_raw       # RGB images from camera
+    /dope/belief_[obj_name]    # belief maps of object
     /dope/dimension_[obj_name] # dimensions of object
     /dope/pose_[obj_name]      # timestamped pose of object
     /dope/rgb_points           # RGB images with detected cuboids overlaid

diff --git a/src/dope/inference/detector.py b/src/dope/inference/detector.py
@@ -19,6 +19,8 @@
 from scipy.ndimage.filters import gaussian_filter
 from torch.autograd import Variable
 
+from dope.utils import get_image_grid
+
 # Import the definition of the neural network model and cuboids
 
 #global transform for image input
@@ -239,9 +241,12 @@ class ObjectDetector(object):
     '''This class contains methods for object detection'''
 
     @staticmethod
-    def detect_object_in_image(net_model, pnp_solver, in_img, config):
-        '''Detect objects in a image using a specific trained network model'''
-
+    def detect_object_in_image(net_model, pnp_solver, in_img, config,
+                               make_belief_debug_img=False, norm_belief=True, overlay_image=True):
+        """
+        Detect objects in a image using a specific trained network model
+        Returns the poses of the objects and the belief maps
+        """
         if in_img is None:
             return []
 
@@ -255,7 +260,45 @@ def detect_object_in_image(net_model, pnp_solver, in_img, config):
         # Find objects from network output
         detected_objects = ObjectDetector.find_object_poses(vertex2, aff, pnp_solver, config)
 
-        return detected_objects
+        if not make_belief_debug_img:
+            return detected_objects, None
+        else:
+            # Run the belief maps debug display on the belief maps
+            tensor = vertex2
+            belief_imgs = []
+            if overlay_image:
+                upsampling = nn.UpsamplingNearest2d(size=in_img.shape[:2])
+                in_img = (torch.tensor(in_img).float() / 255.0)
+                in_img *= 0.5
+
+            for j in range(tensor.size()[0]):
+                belief = tensor[j].clone()
+                if norm_belief:
+                    belief -= float(torch.min(belief).item())
+                    belief /= float(torch.max(belief).item())
+
+                belief = torch.clamp(belief, 0, 1).cpu()
+                if overlay_image:
+                    belief = upsampling(belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data
+                    belief = torch.cat([
+                        belief.unsqueeze(0) + in_img[:, :, 0],
+                        belief.unsqueeze(0) + in_img[:, :, 1],
+                        belief.unsqueeze(0) + in_img[:, :, 2]
+                    ]).unsqueeze(0)
+                    belief = torch.clamp(belief, 0, 1)
+                else:
+                    belief = torch.cat([
+                        belief.unsqueeze(0),
+                        belief.unsqueeze(0),
+                        belief.unsqueeze(0)
+                    ]).unsqueeze(0)
+                belief_imgs.append(belief.data.squeeze().numpy())
+
+            # Create the image grid
+            belief_imgs = torch.tensor(np.array(belief_imgs))
+            im_belief = get_image_grid(belief_imgs, mean=0, std=1)
+
+            return detected_objects, im_belief
 
     @staticmethod
     def find_object_poses(vertex2, aff, pnp_solver, config):

diff --git a/src/dope/utils.py b/src/dope/utils.py
@@ -82,3 +82,21 @@ def norm_range(t, range_):
                 .copy_(tensor[k])
             k = k + 1
     return grid
+
+
+def get_image_grid(tensor, nrow=3, padding=2, mean=None, std=None):
+    """
+    Saves a given Tensor into an image file.
+    If given a mini-batch tensor, will save the tensor as a grid of images.
+    """
+    from PIL import Image
+
+    # tensor = tensor.cpu()
+    grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=1)
+    if not mean is None:
+        # ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0,2).transpose(0,1).numpy()
+        ndarr = grid.mul(std).add(mean).mul(255).byte().transpose(0, 2).transpose(0, 1).numpy()
+    else:
+        ndarr = grid.mul(0.5).add(0.5).mul(255).byte().transpose(0, 2).transpose(0, 1).numpy()
+    im = Image.fromarray(ndarr)
+    return im