Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

commit #893

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions lib/model/rpn/anchor_target_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ class _AnchorTargetLayer(nn.Module):
labels and bounding-box regression targets.
"""
def __init__(self, feat_stride, scales, ratios):
'''
:param feat_stride: 16
:param scales: [8,16,32]
:param ratios: [0.5,1,2]
'''
super(_AnchorTargetLayer, self).__init__()

self._feat_stride = feat_stride
Expand All @@ -53,7 +58,7 @@ def forward(self, input):
# apply predicted bbox deltas at cell i to each of the 9 anchors
# filter out-of-image anchors

rpn_cls_score = input[0]
rpn_cls_score = input[0] # (B, 18, h, w)
gt_boxes = input[1]
im_info = input[2]
num_boxes = input[3]
Expand All @@ -74,7 +79,7 @@ def forward(self, input):
A = self._num_anchors
K = shifts.size(0)

self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu.
self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu.
all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
all_anchors = all_anchors.view(K * A, 4)

Expand Down
8 changes: 7 additions & 1 deletion lib/model/rpn/bbox_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ def bbox_transform_batch(ex_rois, gt_rois):

return targets

def bbox_transform_inv(boxes, deltas, batch_size):
def bbox_transform_inv(boxes, deltas):
'''
:param boxes: (B, 9*50*38, 4) [x1, y1, x2, y2]
:param deltas: (B, 9*50*38, 4) [delta_x1, delta_y1, delta_x2, delta_y2]
:param batch_size: B
:return:
'''
widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
ctr_x = boxes[:, :, 0] + 0.5 * widths
Expand Down
54 changes: 28 additions & 26 deletions lib/model/rpn/generate_anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,40 +26,53 @@
# -79 -167 96 184
# -167 -343 184 360

#array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])

try:
xrange # Python 2
except NameError:
xrange = range # Python 3


# scales = np.array([8,16,32]), ratios = np.array([0.5,1,2])
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2**np.arange(3, 6)):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""

base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
base_anchor = np.array([1, 1, base_size, base_size]) - 1 # base_anchor = np.array([0, 0, 15, 15])
ratio_anchors = _ratio_enum(base_anchor, ratios) # ratios = ratios = np.array([0.5,1,2])
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in xrange(ratio_anchors.shape[0])])
return anchors

def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
# anchor = np.array([0, 0, 15, 15]), ratios = ratios = np.array([0.5,1,2])
w, h, x_ctr, y_ctr = _whctrs(anchor) # 16, 16, 7.5, 7.5
size = w * h
size_ratios = size / ratios # np.array([512, 256, 128])
ws = np.round(np.sqrt(size_ratios)) # np.array([23, 16, 11])
hs = np.round(ws * ratios) # np.array([12, 16, 22])
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors

def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""

# anchor = np.array([0, 0, 15, 15])
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
Expand All @@ -72,26 +85,15 @@ def _mkanchors(ws, hs, x_ctr, y_ctr):
(x_ctr, y_ctr), output a set of anchors (windows).
"""

ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
ws = ws[:, np.newaxis] # np.array([[23], [16], [11]])
hs = hs[:, np.newaxis] # np.array([[12], [16], [22]])
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors

def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""

w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors

def _scale_enum(anchor, scales):
"""
Expand Down
45 changes: 26 additions & 19 deletions lib/model/rpn/proposal_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class _ProposalLayer(nn.Module):
Outputs object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""

# feat_stride = 16, scales = [8,16,32], ratios = [0.5,1,2]
def __init__(self, feat_stride, scales, ratios):
super(_ProposalLayer, self).__init__()

Expand All @@ -47,7 +47,14 @@ def __init__(self, feat_stride, scales, ratios):
# top[1].reshape(1, 1, 1, 1)

def forward(self, input):

'''
:param input: (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)
:rpn_cls_prob.data: (B, 18, h, w)
:rpn_bbox_pred.data: (B, 36, h, w)
:im_info
:cfg_key
:return:
'''
# Algorithm:
#
# for each (H, W) location i
Expand All @@ -64,49 +71,49 @@ def forward(self, input):

# the first set of _num_anchors channels are bg probs
# the second set are the fg probs
scores = input[0][:, self._num_anchors:, :, :]
scores = input[0][:, self._num_anchors:, :, :] # (B, 9, h, w)
bbox_deltas = input[1]
im_info = input[2]
cfg_key = input[3]

pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 6000
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 300
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # 0.7
min_size = cfg[cfg_key].RPN_MIN_SIZE # 8

batch_size = bbox_deltas.size(0)

feat_height, feat_width = scores.size(2), scores.size(3)
feat_height, feat_width = scores.size(2), scores.size(3) # h, w
shift_x = np.arange(0, feat_width) * self._feat_stride
shift_y = np.arange(0, feat_height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose())
shifts = shifts.contiguous().type_as(scores).float()

A = self._num_anchors
K = shifts.size(0)
A = self._num_anchors # 9
K = shifts.size(0) # ceil(800/16)*ceil(600/16) = h*w = 50*38

self._anchors = self._anchors.type_as(scores)
# anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) #
anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # (B, 9*50*38, 4)

# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:

bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() # (B, h, w, 36)
bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # (B, h*w*9, 4)

# Same story for the scores:
scores = scores.permute(0, 2, 3, 1).contiguous()
scores = scores.view(batch_size, -1)
scores = scores.permute(0, 2, 3, 1).contiguous() # (B, h, w, 9)
scores = scores.view(batch_size, -1) # (B, h*w*9)

# Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
proposals = bbox_transform_inv(anchors, bbox_deltas)

# 2. clip predicted boxes to image
proposals = clip_boxes(proposals, im_info, batch_size)
proposals = clip_boxes(proposals, im_info, batch_size) # (B, 9*50*38, 4) [x1, y1, x2, y2]
# proposals = clip_boxes_batch(proposals, im_info, batch_size)

# assign the score to 0 if it's non keep.
Expand Down Expand Up @@ -150,7 +157,7 @@ def forward(self, input):

if post_nms_topN > 0:
keep_idx_i = keep_idx_i[:post_nms_topN]
proposals_single = proposals_single[keep_idx_i, :]
proposals_single = proposals_single[keep_idx_i, :] # (post_nms_topN, 4)
scores_single = scores_single[keep_idx_i, :]

# padding 0 at the end.
Expand Down
36 changes: 21 additions & 15 deletions lib/model/rpn/rpn.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ def __init__(self, din):
super(_RPN, self).__init__()

self.din = din # get depth of input feature map, e.g., 512
self.anchor_scales = cfg.ANCHOR_SCALES
self.anchor_ratios = cfg.ANCHOR_RATIOS
self.feat_stride = cfg.FEAT_STRIDE[0]
self.anchor_scales = cfg.ANCHOR_SCALES # [8,16,32]
self.anchor_ratios = cfg.ANCHOR_RATIOS # [0.5,1,2]
self.feat_stride = cfg.FEAT_STRIDE[0] # 16

# define the convrelu layers processing input feature map
self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)

# define bg/fg classifcation score layer
self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
# define bg(background)/fg(foreground) classification score layer
self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 18 = 9 (anchors) * 2 (bg/fg)
self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # (B, C, h, w)->(B, 18, h, w)

# define anchor box offset prediction layer
self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 9 (anchors) * 4(coords: x1, y1, x2, y2)
self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0) # (B, C, h, w)->(B, 36, h, w)

# define proposal layer
self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
Expand All @@ -56,24 +56,30 @@ def reshape(x, d):
return x

def forward(self, base_feat, im_info, gt_boxes, num_boxes):

"""
:param base_feat: torch.size(B,C,h,w)
:param im_info:
:param gt_boxes:
:param num_boxes:
:return:
"""
batch_size = base_feat.size(0)

# return feature map after convrelu layer
rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
# get rpn classification score
rpn_cls_score = self.RPN_cls_score(rpn_conv1)
rpn_cls_score = self.RPN_cls_score(rpn_conv1) # (B, 18, h, w)

rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) # (B, 2, 9*h, w)
rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # (B, 18, h, w)

# get rpn offsets to the anchor boxes
rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) # (B, 36, h, w)

# proposal layer
cfg_key = 'TRAIN' if self.training else 'TEST'

# rois: (B, post_nms_topN, 5) 5: [B_index, x1, y1, x2, y2] after NMS
rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
im_info, cfg_key))

Expand All @@ -87,7 +93,7 @@ def forward(self, base_feat, im_info, gt_boxes, num_boxes):
rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

# compute classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # (B, 9*h*w, 2)
rpn_label = rpn_data[0].view(batch_size, -1)

rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
Expand Down