diff --git a/configs/fashion_parsing_segmentation/README.md b/configs/fashion_parsing_segmentation/README.md index 4c8e63e68..4ca452670 100644 --- a/configs/fashion_parsing_segmentation/README.md +++ b/configs/fashion_parsing_segmentation/README.md @@ -29,5 +29,3 @@ mmfashion ├── data └── ... ``` - - diff --git a/configs/fashion_parsing_segmentation/__init__.py b/configs/fashion_parsing_segmentation/__init__.py index 8921c211a..658ba97ee 100644 --- a/configs/fashion_parsing_segmentation/__init__.py +++ b/configs/fashion_parsing_segmentation/__init__.py @@ -3,17 +3,16 @@ from .coco import CocoDataset from .custom import CustomDataset from .dataset_wrappers import ConcatDataset, RepeatDataset +from .mmfashion import mmfashionDataset from .registry import DATASETS from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler from .voc import VOCDataset from .wider_face import WIDERFaceDataset from .xml_style import XMLDataset -from .mmfashion import mmfashionDataset __all__ = [ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', - 'mmfashionDataset', - 'WIDERFaceDataset', 'DATASETS', 'build_dataset' + 'mmfashionDataset', 'WIDERFaceDataset', 'DATASETS', 'build_dataset' ] diff --git a/configs/fashion_parsing_segmentation/demo.py b/configs/fashion_parsing_segmentation/demo.py index e5ecaeeea..131aa6107 100644 --- a/configs/fashion_parsing_segmentation/demo.py +++ b/configs/fashion_parsing_segmentation/demo.py @@ -1,7 +1,7 @@ -from mmdet.apis import init_detector, inference_detector, show_result -import mmcv import argparse +from mmdet.apis import inference_detector, init_detector, show_result + def parse_args(): parser = argparse.ArgumentParser( @@ -34,5 +34,5 @@ def main(): # visualize the results in a new window # or save the visualization results to image files - show_result(img, result, model.CLASSES, out_file=img.split('.')[0]+'_result.jpg') - + show_result( + img, result, model.CLASSES, out_file=img.split('.')[0] + '_result.jpg') diff --git a/configs/fashion_parsing_segmentation/inference.py b/configs/fashion_parsing_segmentation/inference.py index e2f68c36f..57853ec84 100644 --- a/configs/fashion_parsing_segmentation/inference.py +++ b/configs/fashion_parsing_segmentation/inference.py @@ -7,7 +7,6 @@ import torch from mmcv.parallel import collate, scatter from mmcv.runner import load_checkpoint - from mmdet.core import get_classes from mmdet.datasets.pipelines import Compose from mmdet.models import build_detector @@ -152,7 +151,7 @@ def show_result(img, # remove duplicate new_bbox_result = [] for ti, temp in enumerate(bbox_result): - if len(temp) <=1: + if len(temp) <= 1: new_bbox_result.append(temp) continue new_temp = sorted(temp, key=lambda x: x[-1])[-1] diff --git a/configs/fashion_parsing_segmentation/mask_rcnn_r50_fpn_1x.py b/configs/fashion_parsing_segmentation/mask_rcnn_r50_fpn_1x.py index a18b5287d..2dabafc00 100644 --- a/configs/fashion_parsing_segmentation/mask_rcnn_r50_fpn_1x.py +++ b/configs/fashion_parsing_segmentation/mask_rcnn_r50_fpn_1x.py @@ -1,21 +1,21 @@ # model settings model = dict( - type='MaskRCNN', - pretrained='checkpoint/resnet50.pth', - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0,1,2,3), - frozen_stages=1, - norm_cfg = dict(type='BN', requires_grad=True), - style='pytorch'), - neck=dict( + type='MaskRCNN', + pretrained='checkpoint/resnet50.pth', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch'), + neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), - rpn_head=dict( + rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, @@ -27,79 +27,79 @@ loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), - bbox_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - bbox_head=dict( - type='SharedFCBBoxHead', - num_fcs=2, - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=81, - target_means=[0., 0., 0., 0.], - target_stds=[0.1, 0.1, 0.2, 0.2], - reg_class_agnostic=False, - loss_cls=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), - mask_roi_extractor=dict( - type='SingleRoIExtractor', - roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), - out_channels=256, - featmap_strides=[4, 8, 16, 32]), - mask_head=dict( - type='FCNMaskHead', - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=81, - loss_mask=dict( - type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=81, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=81, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) # model training and testing settings train_cfg = dict( - rpn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0), - rcnn=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - ignore_iof_thr=-1), - sampler=dict( - type='RandomSampler', - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False)) + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)) test_cfg = dict( rpn=dict( @@ -115,9 +115,8 @@ max_per_img=100, mask_thr_binary=0.5)) - # dataset settings -dataset_type= 'mmfashionDataset' +dataset_type = 'mmfashionDataset' data_root = '/data/dataset/DeepFashion/In-shop/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) @@ -149,26 +148,27 @@ ] data = dict( - imgs_per_gpu=2, - workers_per_gpu=1, - train=dict( - type=dataset_type, - ann_file=data_root+'annotations/DeepFashion_segmentation_query.json', - img_prefix=data_root+'Img/', - pipeline=train_pipeline, - data_root=data_root), - val=dict( - type=dataset_type, - ann_file=data_root+'annotations/DeepFashion_segmentation_query.json', - img_prefix=data_root+'Img/', - pipeline=test_pipeline, - data_root=data_root), - test=dict( - type=dataset_type, - ann_file=data_root+'annotations/DeepFashion_segmentation_gallery.json', - img_prefix=data_root+'Img/', - pipeline=test_pipeline, - data_root=data_root)) + imgs_per_gpu=2, + workers_per_gpu=1, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=train_pipeline, + data_root=data_root), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/DeepFashion_segmentation_gallery.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root)) evaluation = dict(interval=5, metric=['bbox', 'segm']) # optimizer @@ -182,14 +182,12 @@ warmup_ratio=1.0 / 3, step=[8, 11]) checkpoint_config = dict(interval=1) -# yapf:disable -log_config=dict( +log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), # dict(type='TensorboardLoggerHook') ]) -# yapf:enable # runtime settings total_epochs = 12 dist_params = dict(backend='nccl') @@ -198,4 +196,3 @@ load_from = None resume_from = None workflow = [('train', 5)] - diff --git a/configs/fashion_parsing_segmentation/mmfashion.py b/configs/fashion_parsing_segmentation/mmfashion.py index 90943e390..1cf8d594f 100644 --- a/configs/fashion_parsing_segmentation/mmfashion.py +++ b/configs/fashion_parsing_segmentation/mmfashion.py @@ -4,4 +4,6 @@ @DATASETS.register_module class mmfashionDataset(CocoDataset): - CLASSES=('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', 'neckwear', 'headwear', 'eyeglass','belt', 'footwear', 'hair', 'skin', 'face') + CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', + 'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair', + 'skin', 'face') diff --git a/demo/test_fashion_recommender.py b/demo/test_fashion_recommender.py index 7547e9f39..9f96a7105 100644 --- a/demo/test_fashion_recommender.py +++ b/demo/test_fashion_recommender.py @@ -1,13 +1,12 @@ from __future__ import division import argparse import os + import torch -from torch.autograd import Variable -import numpy as np from mmcv import Config from mmcv.runner import load_checkpoint -from mmfashion.apis import get_root_logger, init_dist, test_fashion_recommender +from mmfashion.apis import get_root_logger from mmfashion.datasets import build_dataset from mmfashion.models import build_fashion_recommender from mmfashion.utils import get_img_tensor @@ -19,11 +18,13 @@ def parse_args(): parser.add_argument( '--config', help='test config file path', - default='configs/fashion_recommendation/type_aware_recommendation_polyvore_disjoint_l2_embed.py') + default='configs/fashion_recommendation/' + 'type_aware_recommendation_polyvore_disjoint_l2_embed.py') parser.add_argument( '--checkpoint', help='checkpoint file', - default='checkpoint/FashionRecommend/TypeAware/disjoint/l2_embed/epoch_16.pth') + default='checkpoint/FashionRecommend/TypeAware/disjoint/' + 'l2_embed/epoch_16.pth') parser.add_argument( '--input_dir', type=str, @@ -35,7 +36,6 @@ def parse_args(): return args - def main(): args = parse_args() cfg = Config.fromfile(args.config) @@ -48,7 +48,6 @@ def main(): logger = get_root_logger(cfg.log_level) logger.info('Distributed test: {}'.format(distributed)) - # create model model = build_fashion_recommender(cfg.model) load_checkpoint(model, cfg.load_from, map_location='cpu') @@ -64,7 +63,8 @@ def main(): for dirpath, dirname, fns in os.walk(args.input_dir): for imgname in fns: item_ids.append(imgname.split('.')[0]) - tensor = get_img_tensor(os.path.join(dirpath, imgname), args.use_cuda) + tensor = get_img_tensor( + os.path.join(dirpath, imgname), args.use_cuda) img_tensors.append(tensor) img_tensors = torch.cat(img_tensors) @@ -77,13 +77,14 @@ def main(): try: metric = model.module.triplet_net.metric_branch - except: + except Exception: metric = None # get compatibility score, so far only support images from polyvore dataset = build_dataset(cfg.data.test) - score = dataset.get_single_compatibility_score(embeds, item_ids, metric, args.use_cuda) + score = dataset.get_single_compatibility_score(embeds, item_ids, metric, + args.use_cuda) print("Compatibility score: {:.3f}".format(score)) diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 2bf928509..aab94a3f0 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -148,4 +148,3 @@ configs/mmfashion/mask_rcnn_r50_fpn_1x.py The simplest way is to prepare your dataset to existing dataset formats (AttrDataset, InShopDataset, ConsumerToShopDataset or LandmarkDetectDataset). Please refer to [DATA_PREPARATION.md](DATA_PREPARATION.md) for the dataset specifics. - diff --git a/mmfashion/core/evaluation/retrieval_eval.py b/mmfashion/core/evaluation/retrieval_eval.py index 00b6580d6..c29c38fb6 100644 --- a/mmfashion/core/evaluation/retrieval_eval.py +++ b/mmfashion/core/evaluation/retrieval_eval.py @@ -3,6 +3,7 @@ class Evaluator(object): + def __init__(self, query_dict_fn, gallery_dict_fn, diff --git a/mmfashion/datasets/Attr_Pred.py b/mmfashion/datasets/Attr_Pred.py index c223a6e98..b00174369 100644 --- a/mmfashion/datasets/Attr_Pred.py +++ b/mmfashion/datasets/Attr_Pred.py @@ -103,7 +103,9 @@ def get_basic_item(self, idx): l_y = float(l_y) / bbox_h * self.img_size[1] landmark.append(l_y) landmark = torch.from_numpy(np.array(landmark)).float() - else: # here no landmark will be used, just use zero for initialization (global predictor) + else: + # here no landmark will be used, just use zero for initialization + # (global predictor) landmark = torch.zeros(8) data = {'img': img, 'attr': label, 'cate': cate, 'landmark': landmark} return data diff --git a/mmfashion/datasets/Polyvore_outfit.py b/mmfashion/datasets/Polyvore_outfit.py index b283137f0..c70d800e3 100644 --- a/mmfashion/datasets/Polyvore_outfit.py +++ b/mmfashion/datasets/Polyvore_outfit.py @@ -205,15 +205,18 @@ def collect_compatibility_questions(self, compatibility_test_fn): compatibility_questions.append((compat_question, int(data[0]))) return compatibility_questions - - def get_single_compatibility_score(self, embeds, item_ids, metric, use_cuda=True): + def get_single_compatibility_score(self, + embeds, + item_ids, + metric, + use_cuda=True): n_items = embeds.size(0) outfit_score = 0.0 num_comparisons = 0.0 for i in range(n_items - 1): item1_id = item_ids[i] type1 = self.item2category[item1_id] - for j in range(i+1, n_items): + for j in range(i + 1, n_items): item2_id = item_ids[j] type2 = self.item2category[item2_id] condition = self.get_typespaces(type1, type2) @@ -232,7 +235,6 @@ def get_single_compatibility_score(self, embeds, item_ids, metric, use_cuda=True outfit_score = 1 - outfit_score.item() return outfit_score - def test_compatibility(self, embeds, metric): """ Returns the area under a roc curve for the compatibility task embeds: precomputed embedding features used to score @@ -276,7 +278,6 @@ def test_compatibility(self, embeds, metric): auc = roc_auc_score(labels, scores) return auc - def test_fitb(self, embeds, metric): """Returns the accuracy of the fill in the blank task