Merge pull request #9 from CSAILVision/pytorch1.0

PyTorch1.0
CSAILVision · Feb 24, 2019 · bb947de · bb947de
2 parents 7dc43e0 + ac5f459
commit bb947de
Show file tree

Hide file tree

Showing 40 changed files with 32,140 additions and 444 deletions.
diff --git a/README.md b/README.md
@@ -22,10 +22,12 @@ The human visual system is able to extract a remarkable amount of semantic infor
 
 - Dynamic scales of input for training with multiple GPUs.
 
+- Support state-of-the-art PyTorch 1.0
+
 ## Environment
 The code is developed under the following configurations.
 - Hardware: 2-8 GPUs (with at least 12G GPU memories) (change ```[--num_gpus NUM_GPUS]``` accordingly)
-- Software: Ubuntu 16.04.3 LTS, CUDA>=8.0, ***Python>=3.5***, ***PyTorch>=0.4.0***
+- Software: Ubuntu 16.04.3 LTS, CUDA>=8.0, ***Python>=3.5***, ***PyTorch>=0.4.0 (PyTorch 1.0 supported)***
 - Library: opencv, scipy, colormath, tqdm, PyTorch compiled with cffi
 
 *Warning:* We don't support the outdated Python 2 anymore. PyTorch 0.4.0 or higher is required to run the code.
@@ -40,7 +42,7 @@ We have released the UPerNet with state-of-the-art performance proposed in our p
 
 You can use our pretrained models in PyTorch to segment input image. The usage is as follows:
 
-1. Compile Precise RoI Pooling operator. It requires PyTorch>=0.4 (compiled with ```cffi```) and only supports CUDA (CPU mode is not implemented). To compile the essential components:
+1. If you're using PyTorch>=1.0 and on branch master or PyTorch 1.0, skip this step. If you're using 0.4<=PyTorch<1.0, please compile Precise RoI Pooling operator first. (**Please check out to pytorch0.4 branch!!!**) It requires PyTorch compiled with ```cffi``` and only supports CUDA (CPU mode is not implemented). To compile the essential components:
 
 ```bash
     cd lib/nn/prroi_pool

diff --git a/broden_dataset_utils/adeseg.py b/broden_dataset_utils/adeseg.py
@@ -27,13 +27,13 @@ def __init__(self, directory, version):
                 version = ''
         self.root = directory
         self.version = version
-        mat = loadmat(self.expand(self.version, 'index*.mat'), squeeze_me=True)
+        mat = loadmat(os.path.join('./meta_file/ade20k', 'index_ade20k.mat'), squeeze_me=True)
         index = mat['index']
         Ade20kIndex = namedtuple('Ade20kIndex', index.dtype.names)
         self.index = Ade20kIndex(
             **{name: index[name][()] for name in index.dtype.names})
         # Here we use adechallenger scene label instead of ade20k.
-        with open("./meta_file/scene_categories.txt", 'r') as f:
+        with open("./meta_file/ade20k/scene_categories.txt", 'r') as f:
             lines = f.readlines()
         self.index_scene_adecha = []
         for i, l in enumerate(lines):

diff --git a/broden_dataset_utils/dtdseg.py b/broden_dataset_utils/dtdseg.py
@@ -11,8 +11,7 @@ class DtdSegmentation(AbstractSegmentation):
     def __init__(self, directory):
         directory = os.path.expanduser(directory)
         self.directory = directory
-        with open(os.path.join(directory, 'labels',
-                               'labels_joint_anno.txt')) as f:
+        with open(os.path.join('./meta_file/dtd/', 'labels_joint_anno.txt')) as f:
             self.dtd_meta = [line.split(None, 1) for line in f.readlines()]
         # do not include '-' in texture names. No unlabeled sample in data. 
         self.textures = sorted(list(set(sum(

diff --git a/broden_dataset_utils/joint_dataset.py b/broden_dataset_utils/joint_dataset.py
@@ -46,7 +46,7 @@ def __init__(self):
         self.data_sets = OrderedDict(ade20k=ade, pascal=pascal, os=opensurface)
 
         """ use multi source dataset """
-        self.broden_dataset_info = "./meta_file"
+        self.broden_dataset_info = "./meta_file/joint_dataset"
         self.record_list = {"train": [], "validation": []}
         self.record_list['train'].append(get_records(
             os.path.join(self.broden_dataset_info, "broden_ade20k_pascal_train.json")))

diff --git a/broden_dataset_utils/osseg.py b/broden_dataset_utils/osseg.py
@@ -14,15 +14,15 @@ def __init__(self, directory):
         self.directory = directory
         # Process material labels: open label-substance-colors.csv
         subst_name_map = {}
-        with open(os.path.join(directory, 'label-substance-colors.csv')) as f:
+        with open(os.path.join('./meta_file/opensurfaces', 'label-substance-colors.csv')) as f:
             for row in DictReader(f):
                 subst_name_map[row['substance_name']] = int(row['red_color'])
         # NOTE: substance names should be normalized. 
         self.substance_names = ['-'] * (1 + max(subst_name_map.values()))
         for k, v in list(subst_name_map.items()):
             self.substance_names[v] = k
         # Now load the metadata about images from photos.csv
-        with open(os.path.join(directory, 'photos.csv')) as f:
+        with open(os.path.join('./meta_file/opensurfaces/', 'photos.csv')) as f:
             self.image_meta = list(DictReader(f))
             scenes = set(row['scene_category_name'] for row in self.image_meta)
 

diff --git a/broden_dataset_utils/pascalseg.py b/broden_dataset_utils/pascalseg.py
@@ -1,6 +1,7 @@
 
 import os
 import re
+import warnings
 
 import numpy
 from scipy.io import loadmat
@@ -13,7 +14,7 @@ class PascalSegmentation(AbstractSegmentation):
     Implements AbstractSegmentation for the pascal PARTS dataset.
     """
 
-    def __init__(self, directory, collapse_adjectives=None, version=None):
+    def __init__(self, directory, collapse_adjectives=None, version='VOC2010'):
         directory = os.path.expanduser(directory)
         # Default to the latest version present in the directory
         if version is None:
@@ -35,19 +36,24 @@ def __init__(self, directory, collapse_adjectives=None, version=None):
         self.collapse_adjectives = collapse_adjectives
         # Load the parts coding metadata from part2ind.m
         codes = load_part2ind(
-            os.path.join(directory, self.partdir, 'part2ind.m'))
+            os.path.join('./meta_file/pascal/', 'part2ind.m'))
         # Normalized names
         self.codes = normalize_all_readable(codes, collapse_adjectives)
         self.part_object_names, self.part_names, self.part_key = normalize_part_key(self.codes)
         # Load the PASCAL context segmentation labels 
         self.object_names = load_context_labels(
-            os.path.join(directory, self.contextdir, 'labels.txt'))
+            os.path.join('./meta_file/pascal/', 'context_labels.txt'))
         self.unknown_label = self.object_names.index('unknown')
         self.object_names[self.unknown_label] = '-'  # normalize unknown
         # Assume every mat file in the relevant directory is a segmentation.
-        self.segs = sorted([n for n in os.listdir(
-            os.path.join(directory, self.partdir, 'Annotations_Part'))
-                            if n.endswith('.mat')])
+        try:
+            self.segs = sorted([n for n in os.listdir(
+                os.path.join(directory, self.partdir, 'Annotations_Part'))
+                                if n.endswith('.mat')])
+        except OSError:
+            message = 'Error when searching for pascal part annotations, please check your dataset.' \
+                        + ' With this error you may only use testing scripts. Training will fail unless you resolve this warning.'
+            warnings.warn(message)
 
     def all_names(self, category, j):
         if category == 'object':

diff --git a/lib/nn/prroi_pool/.gitignore b/lib/nn/prroi_pool/.gitignore
@@ -1,2 +1,106 @@
-*.o
-/_prroi_pooling
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+.vim-template*
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/lib/nn/prroi_pool/README.md b/lib/nn/prroi_pool/README.md
@@ -0,0 +1,66 @@
+# PreciseRoIPooling
+This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation).
+
+**Acquisition of Localization Confidence for Accurate Object Detection**
+
+_Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.)
+
+https://arxiv.org/abs/1807.11590
+
+## Brief
+
+In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is:
+
+- different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates.
+- different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous.
+
+For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper.
+
+<center><img src="./_assets/prroi_visualization.png" width="80%"></center>
+
+## Implementation
+
+PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome.
+
+## Usage (PyTorch 1.0)
+
+In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented).
+Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do:
+
+```
+from prroi_pool import PrRoIPool2D
+
+avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
+roi_features = avg_pool(features, rois)
+
+# for those who want to use the "functional"
+
+from prroi_pool.functional import prroi_pool2d
+roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
+```
+
+
+## Usage (PyTorch 0.4)
+
+**!!! Please first checkout to the branch pytorch0.4.**
+
+In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented).
+To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:
+
+```
+from prroi_pool import PrRoIPool2D
+
+avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
+roi_features = avg_pool(features, rois)
+
+# for those who want to use the "functional"
+
+from prroi_pool.functional import prroi_pool2d
+roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
+```
+
+Here,
+
+- RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
+- `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
+- The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.
diff --git a/lib/nn/prroi_pool/build.py b/lib/nn/prroi_pool/build.py