diff --git a/adet/config/defaults.py b/adet/config/defaults.py index cd35f6246..a754911a4 100644 --- a/adet/config/defaults.py +++ b/adet/config/defaults.py @@ -356,8 +356,8 @@ _C.MODEL.POLO.TYPE_DCN = 'DCN' _C.MODEL.POLO.NUM_GRIDS = [40, 36, 24, 16, 12] # Number of foreground classes. -_C.MODEL.POLO.NUM_CLASSES = 80 -_C.MODEL.POLO.NUM_KERNELS = 32 +_C.MODEL.POLO.NUM_CLASSES = 7 +_C.MODEL.POLO.NUM_KERNELS = 200 _C.MODEL.POLO.NORM = "GN" _C.MODEL.POLO.USE_COORD_CONV = True _C.MODEL.POLO.PRIOR_PROB = 0.01 @@ -366,13 +366,13 @@ # Channel size for the mask tower. _C.MODEL.POLO.MASK_IN_FEATURES = ["p2", "p3", "p4", "p5"] _C.MODEL.POLO.MASK_IN_CHANNELS = 256 -_C.MODEL.POLO.MASK_CHANNELS = 512 #256 #128 -_C.MODEL.POLO.NUM_MASKS = 32 +_C.MODEL.POLO.MASK_CHANNELS = 512#256 #128 +_C.MODEL.POLO.NUM_MASKS = 200 # Test cfg. _C.MODEL.POLO.NMS_PRE = 500 -_C.MODEL.POLO.SCORE_THR = 0.0005 -_C.MODEL.POLO.UPDATE_THR = 0.0005 +_C.MODEL.POLO.SCORE_THR = 0.5 +_C.MODEL.POLO.UPDATE_THR = 0.5 _C.MODEL.POLO.MASK_THR = 0.5 _C.MODEL.POLO.MAX_PER_IMG = 100 # NMS type: matrix OR mask. diff --git a/adet/data/builtin.py b/adet/data/builtin.py index e87449b3c..a41c9276b 100644 --- a/adet/data/builtin.py +++ b/adet/data/builtin.py @@ -85,8 +85,10 @@ def register_all_coco(root="datasets"): MetadataCatalog.get('CIHP_val').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="coco") MetadataCatalog.get('CIHP_train_v2').set(thing_classes=thing_classes[1:], stuff_classes=stuff_classes, evaluator_type="parsing") MetadataCatalog.get('CIHP_val_v2').set(thing_classes=thing_classes[1:], stuff_classes=stuff_classes, evaluator_type="parsing") -MetadataCatalog.get('PPP_train').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="parsing") -MetadataCatalog.get('PPP_val').set(thing_classes=thing_classes, stuff_classes=stuff_classes, evaluator_type="parsing") + +ppp_thing_classes = ['bg' , 'head', 'upper', 'upper-hand', 'lower-hand', 'upper-leg', 'lower-leg'] +MetadataCatalog.get('PPP_train').set(thing_classes=ppp_thing_classes, stuff_classes=ppp_thing_classes, evaluator_type="parsing") +MetadataCatalog.get('PPP_val').set(thing_classes=ppp_thing_classes, stuff_classes=ppp_thing_classes, evaluator_type="parsing") MetadataCatalog.get('CIHP_train_person').set(thing_classes=['Person'], evaluator_type="coco") MetadataCatalog.get('CIHP_val_person').set(thing_classes=['Person'], evaluator_type="coco") MetadataCatalog.get('CIHP_train_person').set(thing_classes=['Person'], evaluator_type="coco") diff --git a/adet/data/datasets/pascal_person_part.py b/adet/data/datasets/pascal_person_part.py index e6ba95648..a2a6f9c92 100644 --- a/adet/data/datasets/pascal_person_part.py +++ b/adet/data/datasets/pascal_person_part.py @@ -14,9 +14,8 @@ def __init__(self, root, train=False): self.train = train # Loading the Colormap - colormap = loadmat(os.path.join(root, 'CIHP/human_colormap.mat') - )["colormap"] - colormap = colormap * 100 + colormap = np.array([[0, 0, 0], [0, 0, 255], [0, 255, 0], [0, 255, 255], [255, 0, 0], [255, 0, 255], [255, 255, 0]]) + #colormap = colormap * 100 self.colormap = colormap.astype(np.uint8) self.root = os.path.join(root, 'VOCdevkit/VOC2010/') if train: @@ -116,7 +115,7 @@ def create_annotations(self, pictur_id): return objs def __len__(self): - # return 1 + # return 100 return len(self.anno_ids) def get_dicts(self): diff --git a/adet/evaluation/__init__.py b/adet/evaluation/__init__.py index bf7664b01..99831e40b 100644 --- a/adet/evaluation/__init__.py +++ b/adet/evaluation/__init__.py @@ -3,4 +3,5 @@ from .text_eval_script_ic15 import text_eval_main_ic15 from . import rrc_evaluation_funcs from . import rrc_evaluation_funcs_ic15 -from .parsing_evaluation import ParsingEval \ No newline at end of file +from .parsing_evaluation import ParsingEval +from .mhp_evalauation import MHPDatasetEvaluator \ No newline at end of file diff --git a/adet/evaluation/mhp_evalauation.py b/adet/evaluation/mhp_evalauation.py new file mode 100644 index 000000000..c3380249d --- /dev/null +++ b/adet/evaluation/mhp_evalauation.py @@ -0,0 +1,203 @@ +import os.path + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.evaluation import COCOEvaluator, inference_on_dataset, DatasetEvaluator +import matplotlib.pyplot as plt +import torch + +import numpy as np +from PIL import Image, ImageDraw +import time +from .utils import poly_to_mask, plot_mask, voc_ap, cal_one_mean_iou + + +class APEvaluator: + + def __init__(self): + self.tp = [] + self.fp = [] + + self.precision = [] + self.recall = [] + self.ap = [] + + def add_tp(self): + self.tp.append(1) + self.fp.append(0) + + def add_fp(self): + self.tp.append(0) + self.fp.append(1) + + def eval(self, npos): + tp = np.array(self.tp) + fp = np.array(self.fp) + tp = np.cumsum(tp) + fp = np.cumsum(fp) + rec = tp / npos + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + + ap = voc_ap(rec, prec) + self.precision = prec + self.recall = rec + self.ap = ap + return ap + + + + +class MHPDatasetEvaluator(DatasetEvaluator): + + def __init__(self, dataset_name, cfg, distributed, output_dir=None): + super().__init__() + self._cfg = cfg.clone() + self._dataset_name = dataset_name + self._distributed = distributed + self._output_dir = output_dir + self.dataset_dicts = DatasetCatalog.get(dataset_name) + self.metadata = MetadataCatalog.get(dataset_name) + self.num_classes = len(self.metadata.thing_classes) + self.ovthresh_seg = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + # if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError: + pass + + def reset(self): + self.apr = {} + self.app = {} + for i in self.ovthresh_seg: + self.app[i] = APEvaluator() + self.apr[i] = APEvaluator() + self.npos = 0 + self.npart = 0 + self.total_time = 0 + self.delta_time = time.time() + self.num_images = 0 + + def process(self, inputs, outputs): + self.num_images += len(inputs) + self.total_time += (time.time() - self.delta_time) + for input, output in zip(inputs, outputs): + if len(output["instances"]) == 0: + seg_gt = self.mix_parts_of_instance(self.dataset_dicts[input['image_id']]['annotations'], (100, 100)) + self.npos += seg_gt.shape[0] + for i in range(seg_gt.shape[0]): + self.npart += len(np.unique(seg_gt[i])) + continue + w, h = output["instances"].pred_masks.size(1), output["instances"].pred_masks.size(2) + seg_gt = self.mix_parts_of_instance(self.dataset_dicts[input['image_id']]['annotations'], (w, h)) + self.npos += seg_gt.size(0) + for i in range(seg_gt.size(0)): + self.npart += len(np.unique(seg_gt[i])) + + seg_pred = output["instances"].pred_masks + + list_mious = [] + list_ious = [] + for i in range(seg_pred.size(0)): + max_miou = 0 + max_iou = [] + max_iou_id = -1 + a = seg_pred[i].clone().to('cpu') + for j in range(seg_gt.size(0)): + b = seg_gt[j].clone().to('cpu') + b[b >= self.num_classes] = 0 + + seg_iou = cal_one_mean_iou(a.numpy().astype(np.uint8), b.numpy().astype(np.uint8), 7) + # print(seg_iou) + # seg_iou = seg_iou[b.unique().cpu().numpy().astype(np.uint8)] + # seg_iou[seg_iou == 0] = np.nan + mean_seg_iou = np.nanmean(seg_iou[0:]) + # print(mean_seg_iou) + if mean_seg_iou > max_miou: + max_miou = mean_seg_iou + max_iou = seg_iou + max_iou_id = j + # print(len(max_iou)) + list_mious.append({"id": max_iou_id, "iou": max_miou, "iou_list": max_iou}) + + list_mious = sorted(list_mious, key=lambda x: x["iou"], reverse=True) + # print([f"{x['id']}:{x['iou']:.3f}" for x in list_mious]) + for j in self.ovthresh_seg: + id_list = [] + for i in list_mious: + if i['id'] not in id_list: + # print("aa", len(i['iou_list'])) + for k in range(len(i['iou_list'])): + if i['iou_list'][k] == np.nan: + continue + if i['iou_list'][k] >= j: + self.apr[j].add_tp() + else: + self.apr[j].add_fp() + if i["iou"] >= j: + id_list.append(i['id']) + self.app[j].add_tp() + else: + self.app[j].add_fp() + else: + self.app[j].add_fp() + + + # plot_mask(seg_gt, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_gt.png")) + # plot_mask(seg_pred, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_pred.png")) + # img = input["image"].permute(1, 2, 0).cpu().numpy() + # img = (img * 255).astype(np.uint8) + # Image.fromarray(img).save(os.path.join(self._output_dir, str(input['image_id']) + ".png")) + # plt.show() + # self.evaluate() + self.delta_time = time.time() + # return self.evaluate() + + def mix_parts_of_instance(self, instances, size): + person_ids = set() + for i in instances: + person_ids.add(i['parent_id']) + + h, w = size + seg_mask = torch.zeros((len(person_ids), h, w)) + # print(person_ids) + for i in person_ids: + for j in instances: + if j['parent_id'] == i: + mask = poly_to_mask(j['segmentation'], w, h) + mask = torch.from_numpy(mask) + seg_mask[i] = torch.add(seg_mask[i], mask * (j['category_id'] + 0)) + + return seg_mask + + def evaluate(self): + result = {} + app = [] + apr = [] + for i in self.ovthresh_seg: + + result[f"APr_{i}"] = self.apr[i].eval(self.npart) + result[f"APp_{i}"] = self.app[i].eval(self.npos) + print(f"APr_{i} = {result[f'APr_{i}']:.3f}") + print(f"APp_{i} = {result[f'APp_{i}']:.3f}") + app.append(result[f"APp_{i}"]) + apr.append(result[f"APr_{i}"]) + + # tp = np.array(self.tp[i]) + # fp = np.array(self.fp[i]) + # tp = np.cumsum(tp) + # fp = np.cumsum(fp) + # rec = tp / self.npos + # prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + # + # ap = voc_ap(rec, prec) + # print(f"APp@{i}: {ap:.3f}, {self.npos}, {tp[-1]}, {fp[-1]}") + # result[f"APp@{i}"] = ap + + #result["APpvol"] = sum(result.values()) / len(result) + result["APpvol"] = sum(app) / len(app) + result["APrvol"] = sum(apr) / len(apr) + result["total_time"] = self.total_time + result["fps"] = self.num_images / self.total_time + # print(f"APpvol: {result['APpvol']:.3f}") + print(f"total_time: {result['total_time']:.2f}") + print(f"fps: {result['fps']:.2f}") + return result diff --git a/adet/evaluation/parsing_evaluation.py b/adet/evaluation/parsing_evaluation.py index 669a05ad6..51d0bbb45 100644 --- a/adet/evaluation/parsing_evaluation.py +++ b/adet/evaluation/parsing_evaluation.py @@ -10,6 +10,7 @@ import time + def poly_to_mask(polygon, width, height): img = Image.new('L', (width, height), 0) for poly in polygon: @@ -35,8 +36,11 @@ def plot_mask(mask, colormap, classes=20, row=1, mask_name=None): col = ((mask.size(0)) // row) + 2 fig, ax = plt.subplots(col, row, figsize=(10, 10)) for i in range(mask.size(0)): - prediction_colormap = decode_segmentation_masks(mask[i].squeeze().cpu().numpy(), colormap, classes) - + mask[mask >= 7 ] = 0 + prediction_colormap = decode_segmentation_masks(mask[i].squeeze().cpu().numpy(), colormap, 7) + #save the mask + if mask_name is not None: + Image.fromarray(prediction_colormap).save(mask_name+'_'+str(i)+'.png') ax[i // row, i % row].imshow(prediction_colormap) if mask_name is not None: plt.savefig(mask_name) @@ -121,6 +125,10 @@ def process(self, inputs, outputs): self.num_images += len(inputs) self.total_time += (time.time() - self.delta_time) for input, output in zip(inputs, outputs): + # save input image + + + # self.npos += len(self.dataset_dicts[input['image_id']]['annotations']) if len(output["instances"]) == 0: seg_gt = self.mix_parts_of_instance(self.dataset_dicts[input['image_id']]['annotations'], (100, 100)) @@ -164,7 +172,7 @@ def process(self, inputs, outputs): # print(seg_iou) # seg_iou = seg_iou[b.unique().cpu().numpy().astype(np.uint8)] # seg_iou[seg_iou == 0] = np.nan - mean_seg_iou = np.nanmean(seg_iou[0:]) + mean_seg_iou = np.nanmean(seg_iou[1:]) # print(mean_seg_iou) if mean_seg_iou > max_iou: max_iou = mean_seg_iou @@ -192,6 +200,9 @@ def process(self, inputs, outputs): # plot_mask(seg_gt, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_gt.png")) # plot_mask(seg_pred, self.dataset_dicts.colormap, 20, 2, os.path.join(self._output_dir, str(input['image_id']) + "_pred.png")) + # img = input["image"].permute(1, 2, 0).cpu().numpy() + # img = (img * 255).astype(np.uint8) + # Image.fromarray(img).save(os.path.join(self._output_dir, str(input['image_id']) + ".png")) # plt.show() # self.evaluate() self.delta_time = time.time() diff --git a/adet/evaluation/utils.py b/adet/evaluation/utils.py new file mode 100644 index 000000000..e9a3d7866 --- /dev/null +++ b/adet/evaluation/utils.py @@ -0,0 +1,94 @@ +import os.path + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.evaluation import COCOEvaluator, inference_on_dataset, DatasetEvaluator +import matplotlib.pyplot as plt +import torch + +import numpy as np +from PIL import Image, ImageDraw +import time + + + +def poly_to_mask(polygon, width, height): + img = Image.new('L', (width, height), 0) + for poly in polygon: + ImageDraw.Draw(img).polygon(poly, outline=1, fill=1) + mask = np.array(img) + return mask + + +def seg_masks_to_rgb_img(mask, colormap, n_classes): + r = np.zeros_like(mask).astype(np.uint8) + g = np.zeros_like(mask).astype(np.uint8) + b = np.zeros_like(mask).astype(np.uint8) + for l in range(0, n_classes): + idx = mask == l + r[idx] = colormap[l, 0] + g[idx] = colormap[l, 1] + b[idx] = colormap[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + +def plot_mask(mask, colormap, classes=20, row=1, mask_name=None): + col = ((mask.size(0)) // row) + 2 + fig, ax = plt.subplots(col, row, figsize=(10, 10)) + for i in range(mask.size(0)): + mask[mask >= 7 ] = 0 + prediction_colormap = seg_masks_to_rgb_img(mask[i].squeeze().cpu().numpy(), colormap, classes) + #save the mask + if mask_name is not None: + Image.fromarray(prediction_colormap).save(mask_name+'_'+str(i)+'.png') + ax[i // row, i % row].imshow(prediction_colormap) + if mask_name is not None: + plt.savefig(mask_name) + + +def voc_ap(rec, prec, use_07_metric=False): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +def fast_hist(a, b, n): + k = (a >= 0) & (a < n) + return np.bincount(n * a[k].astype(int) + b[k], minlength=n ** 2).reshape(n, n) + + +def cal_one_mean_iou(image_array, label_array, num_parsing): + hist = fast_hist(label_array, image_array, num_parsing).astype(np.float) + num_cor_pix = np.diag(hist) + num_gt_pix = hist.sum(1) + union = num_gt_pix + hist.sum(0) - num_cor_pix + iu = num_cor_pix / union + return iu diff --git a/configs/POLO/Base-POLO.yaml b/configs/POLO/Base-POLO.yaml index 1f870cbc3..d0bf29a5f 100644 --- a/configs/POLO/Base-POLO.yaml +++ b/configs/POLO/Base-POLO.yaml @@ -20,6 +20,8 @@ SOLVER: WARMUP_ITERS: 1000 INPUT: MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 640 +# MAX_SIZE_TEST: 800 MASK_FORMAT: "bitmask" SEED: 10 VERSION: 2 diff --git a/configs/POLO/PPP_R50_3x_v2.yaml b/configs/POLO/PPP_R50_3x_v2.yaml index 9957b6ac0..c1e733bd4 100644 --- a/configs/POLO/PPP_R50_3x_v2.yaml +++ b/configs/POLO/PPP_R50_3x_v2.yaml @@ -7,19 +7,19 @@ MODEL: NUM_CLASSES: 7 POLO: NUM_CLASSES: 7 - NUM_MASKS: 200 - NUM_KERNELS: 200 + NUM_MASKS: 100 + NUM_KERNELS: 100 NORM: 'none' - SCORE_THR: 0.9 - UPDATE_THR: 0.1 + SCORE_THR: 0.5 + UPDATE_THR: 0.5 FPN_SCALE_RANGES: ((1, 48), (24, 96), (48, 192), (96, 384), (192, 2048)) SOLVER: STEPS: (35000, 70000, 105000) MAX_ITER: 141400 IMS_PER_BATCH: 2 - BASE_LR: 0.0002 + BASE_LR: 0.0001 # BASE_LR: 0.000003125 - GAMMA: 0.1 + GAMMA: 0.5 WARMUP_FACTOR: 0.01 WARMUP_ITERS: 100 DATASETS: diff --git a/tools/train_net.py b/tools/train_net.py index bdb7569ac..17162f526 100644 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -41,7 +41,7 @@ from adet.data.fcpose_dataset_mapper import FCPoseDatasetMapper from adet.config import get_cfg from adet.checkpoint import AdetCheckpointer -from adet.evaluation import TextEvaluator, ParsingEval +from adet.evaluation import TextEvaluator, ParsingEval, MHPDatasetEvaluator from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping @@ -183,7 +183,8 @@ def build_evaluator(cls, cfg, dataset_name, output_folder=None): if evaluator_type == "coco_panoptic_seg": evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) if evaluator_type == "parsing": - evaluator_list.append(ParsingEval(dataset_name, cfg, True, output_folder)) + # evaluator_list.append(ParsingEval(dataset_name, cfg, True, output_folder)) + evaluator_list.append(MHPDatasetEvaluator(dataset_name, cfg, True, output_folder)) if evaluator_type == "pascal_voc": return PascalVOCDetectionEvaluator(dataset_name) if evaluator_type == "lvis":