This repository was archived by the owner on Aug 31, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 186
/
Copy pathgenerate_proposals.py
196 lines (176 loc) · 8.67 KB
/
generate_proposals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
##############################################################
# Copyright (c) 2018-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
##############################################################
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# Modified by Rohit Girdhar
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from core.config import cfg
import utils.boxes as box_utils
from core.nms_wrapper import nms
from modeling.generate_anchors import time_extend_shifts
class GenerateProposalsOp(object):
"""Output object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self, anchors, spatial_scale, train):
self._anchors = anchors
self._num_anchors = self._anchors.shape[0]
self._feat_stride = 1. / spatial_scale
self._train = train
def proposals_for_one_image(
self, im_info, all_anchors, bbox_deltas, scores, frames_per_vid):
# Get mode-dependent configuration
cfg_key = 'TRAIN' if self._train else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# - bbox deltas will be (4 * A * frames_per_vid, H, W) format from
# conv output
# - transpose to (H, W, 4 * A * frames_per_vid)
# - reshape to (H * W * A, 4 * frames_per_vid) where rows are ordered
# by (H, W, A) in slowest to fastest order to match the enumerated
# anchors
bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((
-1, 4 * frames_per_vid))
# Same story for the scores:
# - scores are (A, H, W) format from conv output
# This computes the score for the tube
# - transpose to (H, W, A)
# - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
# to match the order of anchors and bbox_deltas
scores = scores.transpose((1, 2, 0)).reshape((-1, 1))
# 4. sort all (proposal, score) pairs by score from highest to lowest
# 5. take top pre_nms_topN (e.g. 6000)
if pre_nms_topN <= 0 or pre_nms_topN > len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(
-scores.squeeze(), pre_nms_topN
)[:pre_nms_topN]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
bbox_deltas = bbox_deltas[order, :]
all_anchors = all_anchors[order, :]
scores = scores[order]
# 1. Transform anchors into proposals via bbox transformations
proposals = box_utils.bbox_transform(
all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0))
# 2. clip proposals to image (may result in proposals with zero area
# that will be removed in the next step)
proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])
# 3. remove predicted boxes with either height or width < min_size
# again, needs to be done one each frame and need to "AND" over frames
keep = np.arange(proposals.shape[0])
for frame_id in range(frames_per_vid):
keep = np.intersect1d(
keep, _filter_boxes(
proposals[:, frame_id * 4: (frame_id + 1) * 4],
min_size, im_info))
proposals = proposals[keep, :]
scores = scores[keep]
# 6. apply loose nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
if nms_thresh > 0:
# When we're training on multiple GPUs, running NMS on the GPU
# causes serious perf issues. We need to debug, but for now force
# running on the CPU when training
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
scores = scores[keep]
return proposals, scores
def forward(self, inputs, outputs):
# 1. for each location i in a (H, W) grid:
# generate A anchor boxes centered on cell i
# apply predicted bbox deltas to each of the A anchors at cell i
# 2. clip predicted boxes to image
# 3. remove predicted boxes with either height or width < threshold
# 4. sort all (proposal, score) pairs by score from highest to lowest
# 5. take the top pre_nms_topN proposals before NMS
# 6. apply NMS with a loose threshold (0.7) to the remaining proposals
# 7. take after_nms_topN proposals after NMS
# 8. return the top proposals
# predicted probability of fg object for each RPN anchor
scores = inputs[0].data
# predicted achors transformations
bbox_deltas = inputs[1].data
# input image (height, width, scale), in which scale is the scale factor
# applied to the original dataset image to get the network input image
im_info = inputs[2].data
# 1. Generate proposals from bbox deltas and shifted anchors
height, width = scores.shape[-2:]
# Enumerate all shifted positions on the (H, W) grid
shift_x = np.arange(0, width) * self._feat_stride
shift_y = np.arange(0, height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y, copy=False)
# Convert to (K, 4), K=H*W, where the columns are (dx, dy, dx, dy)
# shift pointing to each grid location
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()
# Broacast anchors over shifts to enumerate all anchors at all positions
# in the (H, W) grid:
# - add A anchors of shape (1, A, 4) to
# - K shifts of shape (K, 1, 4) to get
# - all shifted anchors of shape (K, A, 4)
# - reshape to (K*A, 4) shifted anchors
num_images = inputs[0].shape[0]
A = self._num_anchors
K = shifts.shape[0]
assert bbox_deltas.shape[1] // A == self._anchors.shape[1], \
'bbox_deltas {}, A {}, self._anchors {} not consistent'.format(
bbox_deltas.shape, A, self._anchors.shape)
frames_per_vid = bbox_deltas.shape[1] // (4 * A)
# replicate the shifts for each time dimension
shifts = time_extend_shifts(shifts, frames_per_vid)
all_anchors = self._anchors[np.newaxis, :, :] + shifts[:, np.newaxis, :]
all_anchors = all_anchors.reshape((K * A, 4 * frames_per_vid))
# rois = np.empty((0, 5), dtype=np.float32)
rois = np.empty((0, 4 * frames_per_vid + 1), dtype=np.float32)
roi_probs = np.empty((0, 1), dtype=np.float32)
for im_i in range(num_images):
im_i_boxes, im_i_probs = self.proposals_for_one_image(
im_info[im_i, ...], all_anchors, bbox_deltas[im_i, ...],
scores[im_i, ...], frames_per_vid)
batch_inds = im_i * np.ones(
(im_i_boxes.shape[0], 1), dtype=np.float32)
im_i_rois = np.hstack((batch_inds, im_i_boxes))
rois = np.append(rois, im_i_rois, axis=0)
roi_probs = np.append(roi_probs, im_i_probs, axis=0)
outputs[0].reshape(rois.shape)
outputs[0].data[...] = rois
if len(outputs) > 1:
outputs[1].reshape(roi_probs.shape)
outputs[1].data[...] = roi_probs
def _filter_boxes(boxes, min_size, im_info):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
min_size *= im_info[2]
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
x_ctr = boxes[:, 0] + ws / 2.
y_ctr = boxes[:, 1] + hs / 2.
keep = np.where(
(ws >= min_size) & (hs >= min_size) &
(x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0]
return keep