-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8d71453
commit 3f430c6
Showing
88 changed files
with
12,138 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
from . import data # register all new datasets | ||
from . import modeling | ||
|
||
# config | ||
from .config import add_maskformer2_config | ||
|
||
# dataset loading | ||
from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper | ||
from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper | ||
from .data.dataset_mappers.mask_former_instance_dataset_mapper import ( | ||
MaskFormerInstanceDatasetMapper, | ||
) | ||
from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import ( | ||
MaskFormerPanopticDatasetMapper, | ||
) | ||
from .data.dataset_mappers.mask_former_semantic_dataset_mapper import ( | ||
MaskFormerSemanticDatasetMapper, | ||
) | ||
|
||
# models | ||
from .maskformer_model import MaskFormer | ||
from .guide_maskformer_model import GuideMaskFormer | ||
from .test_time_augmentation import SemanticSegmentorWithTTA | ||
|
||
# evaluation | ||
from .evaluation.instance_evaluation import InstanceSegEvaluator |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# -*- coding: utf-8 -*- | ||
from detectron2.config import CfgNode as CN | ||
|
||
|
||
def add_maskformer2_config(cfg): | ||
""" | ||
Add config for MASK_FORMER. | ||
""" | ||
# NOTE: configs from original maskformer | ||
# data config | ||
# select the dataset mapper | ||
cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic" | ||
# Color augmentation | ||
cfg.INPUT.COLOR_AUG_SSD = False | ||
# We retry random cropping until no single category in semantic segmentation GT occupies more | ||
# than `SINGLE_CATEGORY_MAX_AREA` part of the crop. | ||
cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 | ||
# Pad image and segmentation GT in dataset mapper. | ||
cfg.INPUT.SIZE_DIVISIBILITY = -1 | ||
|
||
# solver config | ||
# weight decay on embedding | ||
cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 | ||
# optimizer | ||
cfg.SOLVER.OPTIMIZER = "ADAMW" | ||
cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 | ||
|
||
# for grad accumulation | ||
cfg.SOLVER.GRAD_ACCU_STEPS = 1 | ||
|
||
# mask_former model config | ||
cfg.MODEL.MASK_FORMER = CN() | ||
|
||
# loss | ||
cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True | ||
cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1 | ||
cfg.MODEL.MASK_FORMER.CLASS_WEIGHT = 1.0 | ||
cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0 | ||
cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0 | ||
|
||
# transformer config | ||
cfg.MODEL.MASK_FORMER.NHEADS = 8 | ||
cfg.MODEL.MASK_FORMER.DROPOUT = 0.1 | ||
cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048 | ||
cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0 | ||
cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6 | ||
cfg.MODEL.MASK_FORMER.PRE_NORM = False | ||
|
||
cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256 | ||
cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100 | ||
|
||
cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5" | ||
cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False | ||
|
||
# mask_former inference config | ||
cfg.MODEL.MASK_FORMER.TEST = CN() | ||
cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True | ||
cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = False | ||
cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False | ||
cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0 | ||
cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0 | ||
cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False | ||
|
||
# Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet) | ||
# you can use this config to override | ||
cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32 | ||
|
||
# for harmonic embedding | ||
cfg.MODEL.MASK_FORMER.LOGGER = 'wandb' | ||
cfg.MODEL.MASK_FORMER.USE_EDGE = False | ||
cfg.MODEL.MASK_FORMER.DILATE_KERNEL_SIZE = 5 | ||
cfg.MODEL.MASK_FORMER.GUIDE_FUNCTION_PATH = None | ||
cfg.MODEL.MASK_FORMER.EMB_WEIGHT = 5.0 | ||
cfg.MODEL.MASK_FORMER.USE_HPE = True # fixed harmonic embedding as pe | ||
cfg.MODEL.MASK_FORMER.USE_DPE = True # dynamic pe generation | ||
cfg.MODEL.MASK_FORMER.USE_AUX_SUP = True | ||
# cfg.MODEL.MASK_FORMER.USE_EMB = False | ||
# cfg.MODEL.MASK_FORMER.USE_QUERY_INIT = False | ||
# cfg.MODEL.MASK_FORMER.PE_TYPE = "cart" | ||
# cfg.MODEL.MASK_FORMER.OUT_MOD_TYPE = None | ||
|
||
# pixel decoder config | ||
cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256 | ||
# adding transformer in pixel decoder | ||
cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0 | ||
# pixel decoder | ||
cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder" | ||
|
||
# swin transformer backbone | ||
cfg.MODEL.SWIN = CN() | ||
cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224 | ||
cfg.MODEL.SWIN.PATCH_SIZE = 4 | ||
cfg.MODEL.SWIN.EMBED_DIM = 96 | ||
cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2] | ||
cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24] | ||
cfg.MODEL.SWIN.WINDOW_SIZE = 7 | ||
cfg.MODEL.SWIN.MLP_RATIO = 4.0 | ||
cfg.MODEL.SWIN.QKV_BIAS = True | ||
cfg.MODEL.SWIN.QK_SCALE = None | ||
cfg.MODEL.SWIN.DROP_RATE = 0.0 | ||
cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0 | ||
cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3 | ||
cfg.MODEL.SWIN.APE = False | ||
cfg.MODEL.SWIN.PATCH_NORM = True | ||
cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"] | ||
cfg.MODEL.SWIN.USE_CHECKPOINT = False | ||
|
||
# NOTE: maskformer2 extra configs | ||
# transformer module | ||
cfg.MODEL.MASK_FORMER.TRANSFORMER_DECODER_NAME = "MultiScaleMaskedTransformerDecoder" | ||
|
||
# LSJ aug | ||
cfg.INPUT.IMAGE_SIZE = 1024 | ||
cfg.INPUT.MIN_SCALE = 0.1 | ||
cfg.INPUT.MAX_SCALE = 2.0 | ||
|
||
# MSDeformAttn encoder configs | ||
cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] | ||
cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 | ||
cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 | ||
|
||
# point loss configs | ||
# Number of points sampled during training for a mask point head. | ||
cfg.MODEL.MASK_FORMER.TRAIN_NUM_POINTS = 112 * 112 | ||
# Oversampling parameter for PointRend point sampling during training. Parameter `k` in the | ||
# original paper. | ||
cfg.MODEL.MASK_FORMER.OVERSAMPLE_RATIO = 3.0 | ||
# Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in | ||
# the original paper. | ||
cfg.MODEL.MASK_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
from . import datasets |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. |
Binary file added
BIN
+175 Bytes
mask2former/data/dataset_mappers/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file added
BIN
+4.85 KB
...data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-39.pyc
Binary file not shown.
Binary file added
BIN
+4.57 KB
...data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-39.pyc
Binary file not shown.
Binary file added
BIN
+5.35 KB
...ormer/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-39.pyc
Binary file not shown.
Binary file added
BIN
+4.29 KB
...ormer/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-39.pyc
Binary file not shown.
Binary file added
BIN
+4.9 KB
...ormer/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-39.pyc
Binary file not shown.
189 changes: 189 additions & 0 deletions
189
mask2former/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py | ||
import copy | ||
import logging | ||
|
||
import numpy as np | ||
import torch | ||
|
||
from detectron2.config import configurable | ||
from detectron2.data import detection_utils as utils | ||
from detectron2.data import transforms as T | ||
from detectron2.data.transforms import TransformGen | ||
from detectron2.structures import BitMasks, Instances | ||
|
||
from pycocotools import mask as coco_mask | ||
|
||
__all__ = ["COCOInstanceNewBaselineDatasetMapper"] | ||
|
||
|
||
def convert_coco_poly_to_mask(segmentations, height, width): | ||
masks = [] | ||
for polygons in segmentations: | ||
rles = coco_mask.frPyObjects(polygons, height, width) | ||
mask = coco_mask.decode(rles) | ||
if len(mask.shape) < 3: | ||
mask = mask[..., None] | ||
mask = torch.as_tensor(mask, dtype=torch.uint8) | ||
mask = mask.any(dim=2) | ||
masks.append(mask) | ||
if masks: | ||
masks = torch.stack(masks, dim=0) | ||
else: | ||
masks = torch.zeros((0, height, width), dtype=torch.uint8) | ||
return masks | ||
|
||
|
||
def build_transform_gen(cfg, is_train): | ||
""" | ||
Create a list of default :class:`Augmentation` from config. | ||
Now it includes resizing and flipping. | ||
Returns: | ||
list[Augmentation] | ||
""" | ||
assert is_train, "Only support training augmentation" | ||
image_size = cfg.INPUT.IMAGE_SIZE | ||
min_scale = cfg.INPUT.MIN_SCALE | ||
max_scale = cfg.INPUT.MAX_SCALE | ||
|
||
augmentation = [] | ||
|
||
if cfg.INPUT.RANDOM_FLIP != "none": | ||
augmentation.append( | ||
T.RandomFlip( | ||
horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", | ||
vertical=cfg.INPUT.RANDOM_FLIP == "vertical", | ||
) | ||
) | ||
|
||
augmentation.extend([ | ||
T.ResizeScale( | ||
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size | ||
), | ||
T.FixedSizeCrop(crop_size=(image_size, image_size)), | ||
]) | ||
|
||
return augmentation | ||
|
||
|
||
# This is specifically designed for the COCO dataset. | ||
class COCOInstanceNewBaselineDatasetMapper: | ||
""" | ||
A callable which takes a dataset dict in Detectron2 Dataset format, | ||
and map it into a format used by MaskFormer. | ||
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. | ||
The callable currently does the following: | ||
1. Read the image from "file_name" | ||
2. Applies geometric transforms to the image and annotation | ||
3. Find and applies suitable cropping to the image and annotation | ||
4. Prepare image and annotation to Tensors | ||
""" | ||
|
||
@configurable | ||
def __init__( | ||
self, | ||
is_train=True, | ||
*, | ||
tfm_gens, | ||
image_format, | ||
): | ||
""" | ||
NOTE: this interface is experimental. | ||
Args: | ||
is_train: for training or inference | ||
augmentations: a list of augmentations or deterministic transforms to apply | ||
tfm_gens: data augmentation | ||
image_format: an image format supported by :func:`detection_utils.read_image`. | ||
""" | ||
self.tfm_gens = tfm_gens | ||
logging.getLogger(__name__).info( | ||
"[COCOInstanceNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(str(self.tfm_gens)) | ||
) | ||
|
||
self.img_format = image_format | ||
self.is_train = is_train | ||
|
||
@classmethod | ||
def from_config(cls, cfg, is_train=True): | ||
# Build augmentation | ||
tfm_gens = build_transform_gen(cfg, is_train) | ||
|
||
ret = { | ||
"is_train": is_train, | ||
"tfm_gens": tfm_gens, | ||
"image_format": cfg.INPUT.FORMAT, | ||
} | ||
return ret | ||
|
||
def __call__(self, dataset_dict): | ||
""" | ||
Args: | ||
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. | ||
Returns: | ||
dict: a format that builtin models in detectron2 accept | ||
""" | ||
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below | ||
image = utils.read_image(dataset_dict["file_name"], format=self.img_format) | ||
utils.check_image_size(dataset_dict, image) | ||
|
||
# TODO: get padding mask | ||
# by feeding a "segmentation mask" to the same transforms | ||
padding_mask = np.ones(image.shape[:2]) | ||
|
||
image, transforms = T.apply_transform_gens(self.tfm_gens, image) | ||
# the crop transformation has default padding value 0 for segmentation | ||
padding_mask = transforms.apply_segmentation(padding_mask) | ||
padding_mask = ~ padding_mask.astype(bool) | ||
|
||
image_shape = image.shape[:2] # h, w | ||
|
||
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, | ||
# but not efficient on large generic data structures due to the use of pickle & mp.Queue. | ||
# Therefore it's important to use torch.Tensor. | ||
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) | ||
dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask)) | ||
|
||
if not self.is_train: | ||
# USER: Modify this if you want to keep them for some reason. | ||
dataset_dict.pop("annotations", None) | ||
return dataset_dict | ||
|
||
if "annotations" in dataset_dict: | ||
# USER: Modify this if you want to keep them for some reason. | ||
for anno in dataset_dict["annotations"]: | ||
# Let's always keep mask | ||
# if not self.mask_on: | ||
# anno.pop("segmentation", None) | ||
anno.pop("keypoints", None) | ||
|
||
# USER: Implement additional transformations if you have other types of data | ||
annos = [ | ||
utils.transform_instance_annotations(obj, transforms, image_shape) | ||
for obj in dataset_dict.pop("annotations") | ||
if obj.get("iscrowd", 0) == 0 | ||
] | ||
# NOTE: does not support BitMask due to augmentation | ||
# Current BitMask cannot handle empty objects | ||
instances = utils.annotations_to_instances(annos, image_shape) | ||
# After transforms such as cropping are applied, the bounding box may no longer | ||
# tightly bound the object. As an example, imagine a triangle object | ||
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight | ||
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to | ||
# the intersection of original bounding box and the cropping box. | ||
instances.gt_boxes = instances.gt_masks.get_bounding_boxes() | ||
# Need to filter empty instances first (due to augmentation) | ||
instances = utils.filter_empty_instances(instances) | ||
# Generate masks from polygon | ||
h, w = instances.image_size | ||
# image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float) | ||
if hasattr(instances, 'gt_masks'): | ||
gt_masks = instances.gt_masks | ||
gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w) | ||
instances.gt_masks = gt_masks | ||
dataset_dict["instances"] = instances | ||
|
||
return dataset_dict |
Oops, something went wrong.