SSD Color augmentation

Alexander Kirillov · facebook-github-bot · commit 47c34bca6976 · 2020-05-01T16:50:27.000-07:00
Reviewed By: rbgirshick

Differential Revision: D19529633

fbshipit-source-id: c9e7028c840007eb5435a0a4d9ff4ecfd1286a6f
diff --git a/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_cityscapes.yaml b/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_cityscapes.yaml
@@ -28,3 +28,6 @@ INPUT:
     TYPE: "absolute"
     SIZE: (512, 1024)
     SINGLE_CATEGORY_MAX_AREA: 0.75
+  COLOR_AUG_SSD: True
+DATALOADER:
+  NUM_WORKERS: 16
diff --git a/projects/PointRend/point_rend/color_augmentation.py b/projects/PointRend/point_rend/color_augmentation.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import numpy as np
+import random
+import cv2
+from fvcore.transforms.transform import Transform
+
+from detectron2.data.transforms import TransformGen
+
+
+class ColorAugSSD(TransformGen):
+    def __init__(self):
+        super().__init__()
+
+    def get_transform(self, img):
+        return ColorAugSSDTransform()
+
+
+class ColorAugSSDTransform(Transform):
+    """
+    A color related data augmentation used in Single Shot Multibox Detector (SSD).
+
+    Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
+       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
+       SSD: Single Shot MultiBox Detector. ECCV 2016.
+
+    Implementation based on:
+
+     https://github.com/weiliu89/caffe/blob
+       /4817bf8b4200b35ada8ed0dc378dceaf38c539e4
+       /src/caffe/util/im_transforms.cpp
+
+     https://github.com/chainer/chainercv/blob
+       /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv
+       /links/model/ssd/transforms.py
+    """
+
+    def __init__(
+        self,
+        brightness_delta=32,
+        contrast_low=0.5,
+        contrast_high=1.5,
+        saturation_low=0.5,
+        saturation_high=1.5,
+        hue_delta=18,
+    ):
+        super().__init__()
+        self._set_attributes(locals())
+
+    def apply_coords(self, coords):
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        return segmentation
+
+    def apply_image(self, img, interp=None):
+        img = self.brightness(img)
+        if random.randrange(2):
+            img = self.contrast(img)
+            img = self.saturation(img)
+            img = self.hue(img)
+        else:
+            img = self.saturation(img)
+            img = self.hue(img)
+            img = self.contrast(img)
+        return img
+
+    def convert(self, img, alpha=1, beta=0):
+        img = img.astype(np.float32) * alpha + beta
+        img = np.clip(img, 0, 255)
+        return img.astype(np.uint8)
+
+    def brightness(self, img):
+        if random.randrange(2):
+            return self.convert(
+                img, beta=random.uniform(-self.brightness_delta, self.brightness_delta)
+            )
+        return img
+
+    def contrast(self, img):
+        if random.randrange(2):
+            return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high))
+        return img
+
+    def saturation(self, img):
+        if random.randrange(2):
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+            img[:, :, 1] = self.convert(
+                img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high)
+            )
+            return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        return img
+
+    def hue(self, img):
+        if random.randrange(2):
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+            img[:, :, 0] = (
+                img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta)
+            ) % 180
+            return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        return img
diff --git a/projects/PointRend/point_rend/config.py b/projects/PointRend/point_rend/config.py
@@ -11,6 +11,8 @@ def add_pointrend_config(cfg):
     # We retry random cropping until no single category in semantic segmentation GT occupies more
     # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
     cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
+    # Color augmentatition from SSD paper for semantic segmentation model during training.
+    cfg.INPUT.COLOR_AUG_SSD = False
 
     # Names of the input feature maps to be used by a coarse mask head.
     cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",)
diff --git a/projects/PointRend/point_rend/dataset_mapper.py b/projects/PointRend/point_rend/dataset_mapper.py
@@ -10,6 +10,8 @@
 from detectron2.data import detection_utils as utils
 from detectron2.data import transforms as T
 
+from .color_augmentation import ColorAugSSD
+
 """
 This file contains the mapping that's applied to "dataset dicts" for semantic segmentation models.
 Unlike the default DatasetMapper this mapper uses cropping as the last transformation.
@@ -40,6 +42,12 @@ def __init__(self, cfg, is_train=True):
 
         self.tfm_gens = utils.build_transform_gen(cfg, is_train)
 
+        if cfg.INPUT.COLOR_AUG_SSD:
+            self.tfm_gens.append(ColorAugSSD())
+            logging.getLogger(__name__).info(
+                "Color augmnetation used in training: " + str(self.tfm_gens[-1])
+            )
+
         # fmt: off
         self.img_format               = cfg.INPUT.FORMAT
         self.single_category_max_area = cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA