diff --git a/adet/config/defaults.py b/adet/config/defaults.py index 6e85f6e82..cd35f6246 100644 --- a/adet/config/defaults.py +++ b/adet/config/defaults.py @@ -351,7 +351,7 @@ _C.MODEL.POLO.INSTANCE_IN_CHANNELS = 256 _C.MODEL.POLO.INSTANCE_CHANNELS = 512 # Convolutions to use in the instance head. -_C.MODEL.POLO.NUM_INSTANCE_CONVS = 4 +_C.MODEL.POLO.NUM_INSTANCE_CONVS = 6 _C.MODEL.POLO.USE_DCN_IN_INSTANCE = False _C.MODEL.POLO.TYPE_DCN = 'DCN' _C.MODEL.POLO.NUM_GRIDS = [40, 36, 24, 16, 12] @@ -366,7 +366,7 @@ # Channel size for the mask tower. _C.MODEL.POLO.MASK_IN_FEATURES = ["p2", "p3", "p4", "p5"] _C.MODEL.POLO.MASK_IN_CHANNELS = 256 -_C.MODEL.POLO.MASK_CHANNELS = 128 +_C.MODEL.POLO.MASK_CHANNELS = 512 #256 #128 _C.MODEL.POLO.NUM_MASKS = 32 # Test cfg. @@ -388,10 +388,10 @@ # _C.MODEL.POLO.LOSS.FOCAL_GAMMA = 2.0 # _C.MODEL.POLO.LOSS.FOCAL_WEIGHT = 1.0 # _C.MODEL.POLO.LOSS.DICE_WEIGHT = 3.0 -_C.MODEL.POLO.LOSS.OBJ_WEIGHT = 1.0 -_C.MODEL.POLO.LOSS.SEGCROSS_WEIGHT = 2.0 +_C.MODEL.POLO.LOSS.OBJ_WEIGHT = 0.1 +_C.MODEL.POLO.LOSS.SEGCROSS_WEIGHT = 200.0 _C.MODEL.POLO.LOSS.SEGCROSS_CLASSES_WEIGHT = None -_C.MODEL.POLO.LOSS.SEGLOVASZ_WEIGHT = 0.1 +_C.MODEL.POLO.LOSS.SEGLOVASZ_WEIGHT = 10.0 _C.MODEL.POLO.LOSS.SEGLOVASZ_PERIMG = True diff --git a/adet/data/builtin.py b/adet/data/builtin.py index 2014b2079..e87449b3c 100644 --- a/adet/data/builtin.py +++ b/adet/data/builtin.py @@ -4,6 +4,7 @@ from detectron2.data.datasets.builtin_meta import _get_builtin_metadata from detectron2.data import MetadataCatalog, DatasetCatalog from .datasets.cihp import get_cihp_dicts, CIHPDataset +from .datasets.pascal_person_part import PPPDataset from .datasets.text import register_text_instances @@ -68,6 +69,9 @@ def register_all_coco(root="datasets"): DatasetCatalog.register("CIHP_train_v2", CIHPDataset(root, train=True)) DatasetCatalog.register("CIHP_val_v2", CIHPDataset(root, train=False)) +DatasetCatalog.register("PPP_train", PPPDataset(root, train=True)) +DatasetCatalog.register("PPP_val", PPPDataset(root, train=False)) + DatasetCatalog.register("CIHP_train_person", lambda: get_cihp_dicts(root, train=True, person_only=True)) DatasetCatalog.register("CIHP_val_person", lambda: get_cihp_dicts(root, train=False, person_only=True)) @@ -81,6 +85,8 @@ def register_all_coco(root="datasets"): MetadataCatalog.get('CIHP_val').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="coco") MetadataCatalog.get('CIHP_train_v2').set(thing_classes=thing_classes[1:], stuff_classes=stuff_classes, evaluator_type="parsing") MetadataCatalog.get('CIHP_val_v2').set(thing_classes=thing_classes[1:], stuff_classes=stuff_classes, evaluator_type="parsing") +MetadataCatalog.get('PPP_train').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="parsing") +MetadataCatalog.get('PPP_val').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="parsing") MetadataCatalog.get('CIHP_train_person').set(thing_classes=['Person'], evaluator_type="coco") MetadataCatalog.get('CIHP_val_person').set(thing_classes=['Person'], evaluator_type="coco") MetadataCatalog.get('CIHP_train_person').set(thing_classes=['Person'], evaluator_type="coco") diff --git a/adet/data/datasets/pascal_person_part.py b/adet/data/datasets/pascal_person_part.py new file mode 100644 index 000000000..e6ba95648 --- /dev/null +++ b/adet/data/datasets/pascal_person_part.py @@ -0,0 +1,142 @@ +import os +from detectron2.structures import BoxMode +from imantics import Polygons, Mask +import pickle +import cv2 +import numpy as np +from torch.utils.data import Dataset +from scipy.io import loadmat +from matplotlib import pyplot as plt + + +class PPPDataset(Dataset): + def __init__(self, root, train=False): + self.train = train + + # Loading the Colormap + colormap = loadmat(os.path.join(root, 'CIHP/human_colormap.mat') + )["colormap"] + colormap = colormap * 100 + self.colormap = colormap.astype(np.uint8) + self.root = os.path.join(root, 'VOCdevkit/VOC2010/') + if train: + dataset = "train_id.txt" + else: + dataset = "val_id.txt" + + l = None + with open(os.path.join(self.root, 'pascal_person_part/pascal_person_part_trainval_list', dataset)) as f: + self.anno_ids = f.read() + self.anno_ids = self.anno_ids.split('\n')[:-1] + # try: + # self.anno_ids.remove('2009_003166') + # self.anno_ids.remove('2008_000572') + # self.anno_ids.remove('2009_005085') + # self.anno_ids.remove('2008_000008') + # self.anno_ids.remove('2008_000036') + # except: + # pass + + + def __call__(self, *args, **kwargs): + return self + + def __getitem__(self, idx): + pictur_id = self.anno_ids[idx] + + record = {} + filename = os.path.join(self.root, 'JPEGImages', pictur_id + '.jpg') + # print(f'file name: {filename}') + height, width = cv2.imread(filename).shape[:2] + + record["file_name"] = filename + record["image_id"] = idx + record["height"] = height + record["width"] = width + record["annotations"] = self.create_annotations(pictur_id) + record['sem_seg_file_name'] = os.path.join(self.root, "pascal_person_part/pascal_person_part_gt", pictur_id + '.png') + return record + + def create_annotations(self, pictur_id): + part_anno = loadmat(os.path.join(self.root, "Annotations_Part", pictur_id + '.mat')) + person_part_mask = self.read_mask(os.path.join(self.root, "pascal_person_part/pascal_person_part_gt", pictur_id + '.png')) + + # plt.imshow(person_part_mask) + # plt.show() + + inst_img = None + cnt = 0 + objs = [] + for i in range(len(part_anno['anno'][0, 0][1][0])): + # print(part_anno['anno'][0, 0][1][0, i][0][0]) + if part_anno['anno'][0, 0][1][0, i][0][0] == 'person': + inst_img = part_anno['anno'][0, 0][1][0, i][2] * person_part_mask + + # plt.imshow(inst_img) + # plt.show() + flg = False + instances = np.unique(inst_img) + for inst in instances: + if inst == 0: + continue + mask = inst_img.copy() + mask[mask != inst] = 0 + mask[mask == inst] = 1 + + polygons = Mask(mask).polygons() + xy = polygons.bbox() + + poly = polygons.segmentation + + # filter out small polygons + true_polygons_list = [] + for p in poly: + if len(p) > 5: + true_polygons_list.append(p) + + if len(true_polygons_list) < 1: + continue + + obj = { + "bbox": list(xy), + "bbox_mode": BoxMode.XYXY_ABS, + "segmentation": true_polygons_list, + "category_id": inst , + "parent_id": cnt, + } + if obj['category_id'] < 0: + print(obj) + print(instances) + objs.append(obj) + flg = True + # print(np.unique(inst_img)) + if flg: + cnt += 1 + # cnt += 1 + return objs + + def __len__(self): + # return 1 + return len(self.anno_ids) + + def get_dicts(self): + return [self.__getitem__(i) for i in range(len(self))] + + def read_mask(self, filename): + mask = cv2.imread(filename) + mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + + mask[mask == 255] = 0 + mask[mask == 15] = 4 + mask[mask == 38] = 1 + mask[mask == 53] = 5 + mask[mask == 75] = 2 + mask[mask == 90] = 6 + mask[mask == 113] = 3 + return mask + + +if __name__ == '__main__': + dataset = PPPDataset('/media/aras_vision/SSD/sina/Other-src/datasets/', train=True) + for i in range(len(dataset)): + print(dataset[i]) diff --git a/adet/evaluation/parsing_evaluation.py b/adet/evaluation/parsing_evaluation.py index ada48fb03..669a05ad6 100644 --- a/adet/evaluation/parsing_evaluation.py +++ b/adet/evaluation/parsing_evaluation.py @@ -7,6 +7,7 @@ import numpy as np from PIL import Image, ImageDraw +import time def poly_to_mask(polygon, width, height): @@ -16,6 +17,7 @@ def poly_to_mask(polygon, width, height): mask = np.array(img) return mask + def decode_segmentation_masks(mask, colormap, n_classes): r = np.zeros_like(mask).astype(np.uint8) g = np.zeros_like(mask).astype(np.uint8) @@ -28,7 +30,8 @@ def decode_segmentation_masks(mask, colormap, n_classes): rgb = np.stack([r, g, b], axis=2) return rgb -def plot_mask(mask, colormap, classes = 20, row=1, mask_name=None): + +def plot_mask(mask, colormap, classes=20, row=1, mask_name=None): col = ((mask.size(0)) // row) + 2 fig, ax = plt.subplots(col, row, figsize=(10, 10)) for i in range(mask.size(0)): @@ -38,6 +41,7 @@ def plot_mask(mask, colormap, classes = 20, row=1, mask_name=None): if mask_name is not None: plt.savefig(mask_name) + def voc_ap(rec, prec, use_07_metric=False): """ ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. @@ -97,7 +101,11 @@ def __init__(self, dataset_name, cfg, distributed, output_dir=None): self.dataset_dicts = DatasetCatalog.get(dataset_name) self.metadata = MetadataCatalog.get(dataset_name) self.ovthresh_seg = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] - + # if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError: + pass def reset(self): self.tp = {} self.fp = {} @@ -105,8 +113,13 @@ def reset(self): self.tp[i] = [] self.fp[i] = [] self.npos = 0 + self.total_time = 0 + self.delta_time = time.time() + self.num_images = 0 def process(self, inputs, outputs): + self.num_images += len(inputs) + self.total_time += (time.time() - self.delta_time) for input, output in zip(inputs, outputs): # self.npos += len(self.dataset_dicts[input['image_id']]['annotations']) if len(output["instances"]) == 0: @@ -116,6 +129,7 @@ def process(self, inputs, outputs): w, h = output["instances"].pred_masks.size(1), output["instances"].pred_masks.size(2) seg_gt = self.mix_parts_of_instance(self.dataset_dicts[input['image_id']]['annotations'], (w, h)) self.npos += seg_gt.size(0) + # print(seg_gt.shape) seg_pred = output["instances"].pred_masks # seg_pred = seg_gt.clone() list_mious = [] @@ -126,24 +140,29 @@ def process(self, inputs, outputs): a = seg_pred[i].clone().to('cpu') for j in range(seg_gt.size(0)): b = seg_gt[j].clone().to('cpu') - b[b >= 20] = 0 - a[a == 15] = 14 - b[b == 15] = 14 - a[a == 17] = 16 - b[b == 17] = 16 - a[a == 19] = 18 - b[b == 19] = 18 - a[a == 6] = 5 - b[b == 6] = 5 - a[a == 7] = 5 - b[b == 7] = 5 + b[b >= 7] = 0 + # a[a == 15] = 14 + # b[b == 15] = 14 + # a[a == 17] = 16 + # b[b == 17] = 16 # a[a == 19] = 18 # b[b == 19] = 18 + # a[a == 6] = 5 + # b[b == 6] = 5 + # a[a == 7] = 5 + # b[b == 7] = 5 + # a[a == 10] = 5 + # b[b == 10] = 5 + # a[a == 12] = 5 + # b[b == 12] = 5 + # a[a > 0] = 1 + # b[b > 0] = 1 + # print(a.unique()) # print(b.unique()) - seg_iou = cal_one_mean_iou(a.numpy().astype(np.uint8), b.numpy().astype(np.uint8), 20) + seg_iou = cal_one_mean_iou(a.numpy().astype(np.uint8), b.numpy().astype(np.uint8), 7) # print(seg_iou) - seg_iou = seg_iou[b.unique().cpu().numpy().astype(np.uint8)] + # seg_iou = seg_iou[b.unique().cpu().numpy().astype(np.uint8)] # seg_iou[seg_iou == 0] = np.nan mean_seg_iou = np.nanmean(seg_iou[0:]) # print(mean_seg_iou) @@ -152,20 +171,31 @@ def process(self, inputs, outputs): max_iou_id = j list_mious.append({"id": max_iou_id, "iou": max_iou}) - # print(list_mious) - for i in list_mious: - for j in self.ovthresh_seg: - if i["iou"] > j: - self.tp[j].append(1) - self.fp[j].append(0) + + list_mious = sorted(list_mious, key=lambda x: x["iou"], reverse=True) + print([f"{x['id']}:{x['iou']:.3f}" for x in list_mious]) + for j in self.ovthresh_seg: + id_list = [] + for i in list_mious: + if i['id'] not in id_list: + if i["iou"] >= j: + id_list.append(i['id']) + self.tp[j].append(1) + self.fp[j].append(0) + else: + self.tp[j].append(0) + self.fp[j].append(1) else: + # pass self.tp[j].append(0) self.fp[j].append(1) - plot_mask(seg_gt, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_gt.png")) - plot_mask(seg_pred, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_pred.png")) + # plot_mask(seg_gt, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_gt.png")) + # plot_mask(seg_pred, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_pred.png")) # plt.show() # self.evaluate() + self.delta_time = time.time() + # return self.evaluate() def mix_parts_of_instance(self, instances, size): person_ids = set() @@ -174,7 +204,7 @@ def mix_parts_of_instance(self, instances, size): h, w = size seg_mask = torch.zeros((len(person_ids), h, w)) - + # print(person_ids) for i in person_ids: for j in instances: if j['parent_id'] == i: @@ -184,7 +214,6 @@ def mix_parts_of_instance(self, instances, size): return seg_mask - def evaluate(self): result = {} for i in self.ovthresh_seg: @@ -194,10 +223,15 @@ def evaluate(self): fp = np.cumsum(fp) rec = tp / self.npos prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec) - print(f"APp@{i}: ", ap) + print(f"APp@{i}: {ap:.3f}, {self.npos}, {tp[-1]}, {fp[-1]}") result[f"APp@{i}"] = ap result["APpvol"] = sum(result.values()) / len(result) - print(f"APpvol: ", result["APpvol"]) + result["total_time"] = self.total_time + result["fps"] = self.num_images / self.total_time + print(f"APpvol: {result['APpvol']:.3f}") + print(f"total_time: {result['total_time']:.2f}") + print(f"fps: {result['fps']:.2f}") return result diff --git a/adet/modeling/polo/lovasz_losses.py b/adet/modeling/polo/lovasz_losses.py index b7b14c1ed..842541ad1 100644 --- a/adet/modeling/polo/lovasz_losses.py +++ b/adet/modeling/polo/lovasz_losses.py @@ -1,19 +1,6 @@ -""" -Lovasz-Softmax and Jaccard hinge loss in PyTorch -Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License) -""" - -from __future__ import print_function, division - import torch -from torch.autograd import Variable +import torch.nn as nn import torch.nn.functional as F -import numpy as np - -try: - from itertools import ifilterfalse -except ImportError: # py3k - from itertools import filterfalse as ifilterfalse def lovasz_grad(gt_sorted): @@ -25,229 +12,71 @@ def lovasz_grad(gt_sorted): gts = gt_sorted.sum() intersection = gts - gt_sorted.float().cumsum(0) union = gts + (1 - gt_sorted).float().cumsum(0) - jaccard = 1. - intersection / union + jaccard = 1 - intersection / union if p > 1: # cover 1-pixel case jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] return jaccard -def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True): - """ - IoU for foreground class - binary: 1 foreground, 0 background - """ - if not per_image: - preds, labels = (preds,), (labels,) - ious = [] - for pred, label in zip(preds, labels): - intersection = ((label == 1) & (pred == 1)).sum() - union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() - if not union: - iou = EMPTY - else: - iou = float(intersection) / float(union) - ious.append(iou) - iou = mean(ious) # mean accross images if per_image - return 100 * iou - - -def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False): - """ - Array of IoU for each (non ignored) class - """ - if not per_image: - preds, labels = (preds,), (labels,) - ious = [] - for pred, label in zip(preds, labels): - iou = [] - for i in range(C): - if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes) - intersection = ((label == i) & (pred == i)).sum() - union = ((label == i) | ((pred == i) & (label != ignore))).sum() - if not union: - iou.append(EMPTY) - else: - iou.append(float(intersection) / float(union)) - ious.append(iou) - ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image - return 100 * np.array(ious) - - -# --------------------------- BINARY LOSSES --------------------------- - - -def lovasz_hinge(logits, labels, per_image=True, ignore=None): - """ - Binary Lovasz hinge loss - logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) - labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) - per_image: compute the loss per image instead of per batch - ignore: void class id - """ - if per_image: - loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) - for log, lab in zip(logits, labels)) - else: - loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) - return loss - - -def lovasz_hinge_flat(logits, labels): - """ - Binary Lovasz hinge loss - logits: [P] Variable, logits at each prediction (between -\infty and +\infty) - labels: [P] Tensor, binary ground truth labels (0 or 1) - ignore: label to ignore - """ - if len(labels) == 0: - # only void pixels, the gradients should be 0 - return logits.sum() * 0. - signs = 2. * labels.float() - 1. - errors = (1. - logits * Variable(signs)) - errors_sorted, perm = torch.sort(errors, dim=0, descending=True) - perm = perm.data - gt_sorted = labels[perm] - grad = lovasz_grad(gt_sorted) - loss = torch.dot(F.relu(errors_sorted), Variable(grad)) - return loss - - -def flatten_binary_scores(scores, labels, ignore=None): - """ - Flattens predictions in the batch (binary case) - Remove labels equal to 'ignore' - """ - scores = scores.view(-1) - labels = labels.view(-1) - if ignore is None: - return scores, labels - valid = (labels != ignore) - vscores = scores[valid] - vlabels = labels[valid] - return vscores, vlabels - - -class StableBCELoss(torch.nn.modules.Module): - def __init__(self): - super(StableBCELoss, self).__init__() - - def forward(self, input, target): - neg_abs = - input.abs() - loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() - return loss.mean() - - -def binary_xloss(logits, labels, ignore=None): - """ - Binary Cross entropy loss - logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) - labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) - ignore: void class id - """ - logits, labels = flatten_binary_scores(logits, labels, ignore) - loss = StableBCELoss()(logits, Variable(labels.float())) - return loss - - -# --------------------------- MULTICLASS LOSSES --------------------------- - - -def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None): +def lovasz_softmax_flat(prb, lbl, ignore_index, only_present): """ Multi-class Lovasz-Softmax loss - probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). - Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. - labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - per_image: compute the loss per image instead of per batch - ignore: void class labels - """ - if per_image: - loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes) - for prob, lab in zip(probas, labels)) - else: - loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes) - return loss - - -def lovasz_softmax_flat(probas, labels, classes='present'): - """ - Multi-class Lovasz-Softmax loss - probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) - labels: [P] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - """ - if probas.numel() == 0: - # only void pixels, the gradients should be 0 - return probas * 0. - C = probas.size(1) - losses = [] - class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes - for c in class_to_sum: - fg = (labels == c).float() # foreground for class c - if (classes is 'present' and fg.sum() == 0): + prb: [P, C] Variable, class probabilities at each prediction (between 0 and 1) + lbl: [P] Tensor, ground truth labels (between 0 and C - 1) + ignore_index: void class labels + only_present: average only on classes present in ground truth + """ + C = prb.shape[0] + prb = prb.permute(1, 2, 0).contiguous().view(-1, C) # H * W, C + lbl = lbl.view(-1) # H * W + if ignore_index is not None: + mask = lbl != ignore_index + if mask.sum() == 0: + return torch.mean(prb * 0) + prb = prb[mask] + lbl = lbl[mask] + + total_loss = 0 + cnt = 0 + for c in range(C): + fg = (lbl == c).float() # foreground for class c + if only_present and fg.sum() == 0: continue - if C == 1: - if len(classes) > 1: - raise ValueError('Sigmoid output possible only with 1 class') - class_pred = probas[:, 0] - else: - class_pred = probas[:, c] - errors = (Variable(fg) - class_pred).abs() - errors_sorted, perm = torch.sort(errors, 0, descending=True) + errors = (fg - prb[:, c]).abs() + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) perm = perm.data fg_sorted = fg[perm] - losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) - return mean(losses) + total_loss += torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + cnt += 1 + return total_loss / cnt -def flatten_probas(probas, labels, ignore=None): +class LovaszSoftmax(nn.Module): """ - Flattens predictions in the batch - """ - if probas.dim() == 3: - # assumes output of a sigmoid layer - B, H, W = probas.size() - probas = probas.view(B, 1, H, W) - B, C, H, W = probas.size() - probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C - labels = labels.view(-1) - if ignore is None: - return probas, labels - valid = (labels != ignore) - vprobas = probas[valid.nonzero().squeeze()] - vlabels = labels[valid] - return vprobas, vlabels - - -def xloss(logits, labels, ignore=None): - """ - Cross entropy loss - """ - return F.cross_entropy(logits, Variable(labels), ignore_index=255) - - -# --------------------------- HELPER FUNCTIONS --------------------------- -def isnan(x): - return x != x - - -def mean(l, ignore_nan=False, empty=0): - """ - nanmean compatible with generators. - """ - l = iter(l) - if ignore_nan: - l = ifilterfalse(isnan, l) - try: - n = 1 - acc = next(l) - except StopIteration: - if empty == 'raise': - raise ValueError('Empty mean') - return empty - for n, v in enumerate(l, 2): - acc += v - if n == 1: - return acc - return acc / n \ No newline at end of file + Multi-class Lovasz-Softmax loss + logits: [B, C, H, W] class logits at each prediction (between -\infty and \infty) + labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + ignore_index: void class labels + only_present: average only on classes present in ground truth + """ + def __init__(self, ignore_index=None, only_present=True): + super().__init__() + self.ignore_index = ignore_index + self.only_present = only_present + + def forward(self, logits, labels): + probas = F.softmax(logits, dim=1) + total_loss = 0 + N = logits.shape[0] + for prb, lbl in zip(probas, labels): + total_loss += lovasz_softmax_flat(prb, lbl, self.ignore_index, self.only_present) + return total_loss / N + + +def lovasz_softmax_loss(x, target, ignore_index=None, only_present=True): + x = F.softmax(x, dim=1) + total_loss = 0 + N = x.shape[0] + for inp, tgt in zip(x, target): + total_loss += lovasz_softmax_flat(inp, tgt, ignore_index, only_present) + return total_loss / N \ No newline at end of file diff --git a/adet/modeling/polo/mask_iou_loss.py b/adet/modeling/polo/mask_iou_loss.py new file mode 100644 index 000000000..d26f7046f --- /dev/null +++ b/adet/modeling/polo/mask_iou_loss.py @@ -0,0 +1,16 @@ +import torch +import torch.nn as nn + + +class MaskIOULoss(nn.Module): + def __init__(self): + super(MaskIOULoss, self).__init__() + + def forward(self, pred, target, weight): + total = torch.stack([pred, target], -1) + l_max = total.max(dim=2)[0] + l_min = total.min(dim=2)[0] + + loss = (l_max.sum(dim=1) / l_min.sum(dim=1)).log() + loss = loss * weight + return loss.sum() \ No newline at end of file diff --git a/adet/modeling/polo/polo.py b/adet/modeling/polo/polo.py index eccc89059..9d6680beb 100644 --- a/adet/modeling/polo/polo.py +++ b/adet/modeling/polo/polo.py @@ -15,12 +15,12 @@ from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY from detectron2.structures import Boxes, ImageList, Instances from detectron2.utils.logger import log_first_n -from fvcore.nn import sigmoid_focal_loss_jit +from fvcore.nn import sigmoid_focal_loss import matplotlib.pyplot as plt from .utils import imrescale, center_of_mass, point_nms, mask_nms, matrix_nms from .loss import dice_loss, FocalLoss -from .lovasz_losses import lovasz_softmax +from .lovasz_losses import LovaszSoftmax __all__ = ["POLO"] @@ -86,6 +86,7 @@ def __init__(self, cfg): # self.focal_loss_alpha = cfg.MODEL.POLO.LOSS.FOCAL_ALPHA # self.focal_loss_gamma = cfg.MODEL.POLO.LOSS.FOCAL_GAMMA # self.focal_loss_weight = cfg.MODEL.POLO.LOSS.FOCAL_WEIGHT + # self.seg_focal_loss_weight = cfg.MODEL.POLO.LOSS.SEG_FOCAL_WEIGHT self.obj_loss_weight = cfg.MODEL.POLO.LOSS.OBJ_WEIGHT self.seg_cross_loss_weight = cfg.MODEL.POLO.LOSS.SEGCROSS_WEIGHT self.seg_cross_loss_classes_weight = cfg.MODEL.POLO.LOSS.SEGCROSS_CLASSES_WEIGHT @@ -334,21 +335,41 @@ def loss(self, cate_preds, kernel_preds, ins_pred, targets): # dice loss loss_ins = [] lovasz_loss_ins = [] + lovosz_softmax = LovaszSoftmax(ignore_index=0) + # focal_loss_ins = [] for input, target in zip(ins_pred_list, ins_labels): if input is None: continue + + # F.cross_entropy(input, target.to(torch.long), reduction='mean',ignore_index=0) + loss_ins.append(F.cross_entropy(input, target.to(torch.long), reduction='mean', weight=self.seg_cross_loss_classes_weight)) + # loss_ins.append(F.cross_entropy(input, target.to(torch.long), reduction='mean', weight=self.seg_cross_loss_classes_weight)) + # lovasz_loss_ins.append(lovosz_softmax(input, target.to(torch.long))) + lovasz_loss_ins.append(F.cross_entropy(input, target.to(torch.long), reduction='mean',ignore_index=0)) # input = torch.sigmoid(input) # loss_ins.append(dice_loss(input, target)) - loss_ins.append(F.cross_entropy(input, target.to(torch.long), reduction='sum', weight=self.seg_cross_loss_classes_weight)/(input.shape[0]*input.shape[2]*input.shape[3])) + # target img to one-hot + # target = F.one_hot(target.long(), self.num_classes).permute(0, 3, 1, 2).float() + # flatten input and target + # input = input.permute(0, 2, 3, 1).reshape(-1, self.num_classes) + # target = target.permute(0, 2, 3, 1).reshape(-1, self.num_classes) + + # focal_loss_ins.append(sigmoid_focal_loss(input, target, + # gamma=2.0, + # alpha=0.25, + # reduction="mean")) # weight=torch.tensor([10.0, 100, 10, 100, 100, 10, 10, 10, 100, 10, 100, 100, 100, 100, 10, 100, 100, 100, 100, 100]).to(self.device)/100)) - input = F.softmax(input, dim=1) - lovasz_loss_ins.append(lovasz_softmax(input, target, ignore=255, per_image=self.seg_lovasz_loss_perimg, classes='all')) + # input = F.softmax(input, dim=1) + # lovasz_loss_ins.append(lovasz_softmax(input, target, ignore=255, per_image=self.seg_lovasz_loss_perimg, classes='all')) # loss_ins_mean = torch.cat(loss_ins).mean() loss_ins_mean = torch.stack(loss_ins).mean() + # loss_focal_ins_mean = torch.stack(focal_loss_ins).mean() lovasz_loss_ins_mean = torch.stack(lovasz_loss_ins).mean() loss_seg_cross = loss_ins_mean * self.seg_cross_loss_weight loss_seg_lovasz = lovasz_loss_ins_mean * self.seg_lovasz_loss_weight + # loss_seg_focal = self.seg_focal_loss_weight * loss_focal_ins_mean + ins_ind_labels = [ torch.cat([ins_ind_labels_level_img.flatten() @@ -389,7 +410,9 @@ def loss(self, cate_preds, kernel_preds, ins_pred, targets): return { 'loss_seg_cross': loss_seg_cross, - 'loss_seg_lovasz': loss_seg_lovasz, + # 'loss_seg_lovasz': loss_seg_lovasz, + 'loss_seg_class': loss_seg_lovasz, + # 'loss_seg_focal': loss_seg_focal, 'loss_object': loss_object} @staticmethod @@ -445,7 +468,7 @@ def inference_single_image( cate_scores = cate_preds[inds] if len(cate_scores) == 0: - print('no cate_scores') + # print('no cate_scores') results = Instances(ori_size) results.scores = torch.tensor([]) results.pred_classes = torch.tensor([]) @@ -474,6 +497,9 @@ def inference_single_image( kernel_preds = kernel_preds.view(N, I, 1, 1, 1) seg_preds = torch.reshape(seg_preds, (-1, self.num_classes, H, W)) seg_preds = seg_preds.unsqueeze(0) + # print('seg_preds', seg_preds.shape) + # print('kernel_preds', kernel_preds.shape) + # raise '' seg_preds = F.conv3d(seg_preds, kernel_preds, stride=1).view(-1, self.num_classes, H, W) # mask. seg_masks = torch.argmax(seg_preds, dim=1) @@ -534,7 +560,7 @@ def inference_single_image( # if self.nms_type == "matrix": # matrix nms & filter. - for i in range(10): + for i in range(2): cate_scores = matrix_nms(cate_labels, binary_masks, sum_masks, cate_scores, sigma=self.nms_sigma, kernel=self.nms_kernel) cate_scores[cate_scores.isnan()] = 0 @@ -797,9 +823,19 @@ def __init__(self, cfg, input_shape: List[ShapeSpec]): nn.Conv2d( self.mask_channels, self.num_masks*self.num_classes, kernel_size=1, stride=1, - padding=0, bias=norm is None), - # nn.GroupNorm(32, self.num_masks*self.num_classes), - nn.ReLU(inplace=True) + padding=1, bias=norm is None), + # nn.GroupNorm(self.num_classes, self.num_masks*self.num_classes), + nn.ReLU(inplace=True), + # nn.Conv2d( + # self.num_masks * 4, self.num_masks * self.num_classes, + # kernel_size=3, stride=1, + # padding=1, bias=norm is None), + # nn.ReLU(inplace=True), + # nn.Conv2d( + # self.num_masks*4, self.num_masks*self.num_classes, + # kernel_size=1, stride=1, + # padding=0, bias=norm is None), + # nn.ReLU(inplace=True) ) for modules in [self.convs_all_levels, self.conv_pred]: diff --git a/configs/POLO/Base-POLO.yaml b/configs/POLO/Base-POLO.yaml index 8ab2d64e3..1f870cbc3 100644 --- a/configs/POLO/Base-POLO.yaml +++ b/configs/POLO/Base-POLO.yaml @@ -8,8 +8,8 @@ MODEL: FPN: IN_FEATURES: ["res2", "res3", "res4", "res5"] POLO: - SCORE_THR: 0.1 - UPDATE_THR: 0.7 + SCORE_THR: 0.5 + UPDATE_THR: 0.5 DATASETS: TRAIN: ("coco_2017_train",) TEST: ("coco_2017_val",) @@ -21,5 +21,5 @@ SOLVER: INPUT: MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) MASK_FORMAT: "bitmask" -SEED: 1 +SEED: 10 VERSION: 2 diff --git a/configs/POLO/CIHP_R50_3x_v2.yaml b/configs/POLO/CIHP_R50_3x_v2.yaml index 756ced694..30adb431f 100644 --- a/configs/POLO/CIHP_R50_3x_v2.yaml +++ b/configs/POLO/CIHP_R50_3x_v2.yaml @@ -7,16 +7,20 @@ MODEL: NUM_CLASSES: 20 POLO: NUM_CLASSES: 20 - NUM_MASKS: 256 - NUM_KERNELS: 256 + NUM_MASKS: 200 + NUM_KERNELS: 200 + NORM: 'none' SOLVER: - STEPS: (15000, 45000, 135000, 200000) - MAX_ITER: 270000 + STEPS: (70000, 105000, 150000) + MAX_ITER: 141400 IMS_PER_BATCH: 2 - BASE_LR: 0.0001 - GAMMA: 0.5 + BASE_LR: 0.0002 +# BASE_LR: 0.000003125 + GAMMA: 0.1 WARMUP_FACTOR: 0.01 - WARMUP_ITERS: 5000 + WARMUP_ITERS: 100 DATASETS: TRAIN: ("CIHP_train_v2",) - TEST: ("CIHP_val_v2",) \ No newline at end of file + TEST: ("CIHP_val_v2",) +TEST: + EVAL_PERIOD: 5000 diff --git a/configs/POLO/PPP_R101_3x.yaml b/configs/POLO/PPP_R101_3x.yaml new file mode 100644 index 000000000..b1a6030db --- /dev/null +++ b/configs/POLO/PPP_R101_3x.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-POLO.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + SEM_SEG_HEAD: + NUM_CLASSES: 7 + POLO: + NUM_CLASSES: 7 + NUM_MASKS: 256 + NUM_KERNELS: 256 + NORM: 'none' + SCORE_THR: 0.5 + UPDATE_THR: 0.5 +# FPN_SCALE_RANGES: ((1, 48), (24, 96), (48, 192), (96, 384), (192, 2048)) +SOLVER: + STEPS: (70000, 100000) + MAX_ITER: 141400 + IMS_PER_BATCH: 2 + BASE_LR: 0.0001 +# BASE_LR: 0.000003125 + GAMMA: 0.1 + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 100 +DATASETS: + TRAIN: ("PPP_train",) + TEST: ("PPP_val",) +TEST: + EVAL_PERIOD: 1000 diff --git a/configs/POLO/PPP_R50_3x_v2.yaml b/configs/POLO/PPP_R50_3x_v2.yaml new file mode 100644 index 000000000..9957b6ac0 --- /dev/null +++ b/configs/POLO/PPP_R50_3x_v2.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-POLO.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + SEM_SEG_HEAD: + NUM_CLASSES: 7 + POLO: + NUM_CLASSES: 7 + NUM_MASKS: 200 + NUM_KERNELS: 200 + NORM: 'none' + SCORE_THR: 0.9 + UPDATE_THR: 0.1 + FPN_SCALE_RANGES: ((1, 48), (24, 96), (48, 192), (96, 384), (192, 2048)) +SOLVER: + STEPS: (35000, 70000, 105000) + MAX_ITER: 141400 + IMS_PER_BATCH: 2 + BASE_LR: 0.0002 +# BASE_LR: 0.000003125 + GAMMA: 0.1 + WARMUP_FACTOR: 0.01 + WARMUP_ITERS: 100 +DATASETS: + TRAIN: ("PPP_train",) + TEST: ("PPP_val",) +TEST: + EVAL_PERIOD: 1000