diff --git a/official/vision/detection/configs/base_config.py b/official/vision/detection/configs/base_config.py index 33519714fc7..b23a7d5d458 100644 --- a/official/vision/detection/configs/base_config.py +++ b/official/vision/detection/configs/base_config.py @@ -87,10 +87,6 @@ }, 'resnet': { 'resnet_depth': 50, - 'dropblock': { - 'dropblock_keep_prob': None, - 'dropblock_size': None, - }, 'batch_norm': { 'batch_norm_momentum': 0.997, 'batch_norm_epsilon': 1e-4, @@ -111,43 +107,6 @@ 'use_sync_bn': False, }, }, - 'nasfpn': { - 'min_level': 3, - 'max_level': 7, - 'fpn_feat_dims': 256, - 'num_repeats': 5, - 'use_separable_conv': False, - 'dropblock': { - 'dropblock_keep_prob': None, - 'dropblock_size': None, - }, - 'batch_norm': { - 'batch_norm_momentum': 0.997, - 'batch_norm_epsilon': 1e-4, - 'batch_norm_trainable': True, - 'use_sync_bn': False, - }, - }, - # tunable_nasfpn:strip_begin - 'tunable_nasfpn_v1': { - 'min_level': 3, - 'max_level': 7, - 'fpn_feat_dims': 256, - 'num_repeats': 5, - 'use_separable_conv': False, - 'dropblock': { - 'dropblock_keep_prob': None, - 'dropblock_size': None, - }, - 'batch_norm': { - 'batch_norm_momentum': 0.997, - 'batch_norm_epsilon': 1e-4, - 'batch_norm_trainable': True, - 'use_sync_bn': False, - }, - 'nodes': None - }, - # tunable_nasfpn:strip_end 'postprocess': { 'use_batched_nms': False, 'max_total_size': 100, diff --git a/official/vision/detection/configs/retinanet_config.py b/official/vision/detection/configs/retinanet_config.py index 63a6c29b2e3..70ee4bb3407 100644 --- a/official/vision/detection/configs/retinanet_config.py +++ b/official/vision/detection/configs/retinanet_config.py @@ -106,10 +106,6 @@ }, 'resnet': { 'resnet_depth': 50, - 'dropblock': { - 'dropblock_keep_prob': None, - 'dropblock_size': None, - }, 'batch_norm': { 'batch_norm_momentum': 0.997, 'batch_norm_epsilon': 1e-4, @@ -128,22 +124,6 @@ 'batch_norm_trainable': True, }, }, - 'nasfpn': { - 'min_level': 3, - 'max_level': 7, - 'fpn_feat_dims': 256, - 'num_repeats': 5, - 'use_separable_conv': False, - 'dropblock': { - 'dropblock_keep_prob': None, - 'dropblock_size': None, - }, - 'batch_norm': { - 'batch_norm_momentum': 0.997, - 'batch_norm_epsilon': 1e-4, - 'batch_norm_trainable': True, - }, - }, 'retinanet_head': { 'min_level': 3, 'max_level': 7, diff --git a/official/vision/detection/main.py b/official/vision/detection/main.py index 7c5b92bb640..dc67a62beae 100644 --- a/official/vision/detection/main.py +++ b/official/vision/detection/main.py @@ -52,7 +52,7 @@ flags.DEFINE_string( 'model', default='retinanet', - help='Model to run: `retinanet` or `shapemask`.') + help='Model to run: `retinanet` or `mask_rcnn`.') flags.DEFINE_string('training_file_pattern', None, 'Location of the train data.') diff --git a/official/vision/detection/modeling/architecture/factory.py b/official/vision/detection/modeling/architecture/factory.py index 3372b893d31..71925aff62c 100644 --- a/official/vision/detection/modeling/architecture/factory.py +++ b/official/vision/detection/modeling/architecture/factory.py @@ -37,19 +37,12 @@ def _batch_norm_op(**kwargs): return _batch_norm_op -def dropblock_generator(params): - return nn_ops.Dropblock( - dropblock_keep_prob=params.dropblock_keep_prob, - dropblock_size=params.dropblock_size) - - def backbone_generator(params): """Generator function for various backbone models.""" if params.architecture.backbone == 'resnet': resnet_params = params.resnet backbone_fn = resnet.Resnet( resnet_depth=resnet_params.resnet_depth, - dropblock=dropblock_generator(resnet_params.dropblock), batch_norm_relu=batch_norm_relu_generator(resnet_params.batch_norm)) else: raise ValueError('Backbone model %s is not supported.' % diff --git a/official/vision/detection/modeling/architecture/nn_ops.py b/official/vision/detection/modeling/architecture/nn_ops.py index b502247b1ec..556d620d8e8 100644 --- a/official/vision/detection/modeling/architecture/nn_ops.py +++ b/official/vision/detection/modeling/architecture/nn_ops.py @@ -84,88 +84,3 @@ def __call__(self, inputs, is_training=None): inputs = tf.nn.relu(inputs) return inputs - -class Dropblock(object): - """DropBlock: a regularization method for convolutional neural networks. - - DropBlock is a form of structured dropout, where units in a contiguous - region of a feature map are dropped together. DropBlock works better than - dropout on convolutional layers due to the fact that activation units in - convolutional layers are spatially correlated. - See https://arxiv.org/pdf/1810.12890.pdf for details. - """ - - def __init__(self, - dropblock_keep_prob=None, - dropblock_size=None, - data_format='channels_last'): - self._dropblock_keep_prob = dropblock_keep_prob - self._dropblock_size = dropblock_size - self._data_format = data_format - - def __call__(self, net, is_training=False): - """Builds Dropblock layer. - - Args: - net: `Tensor` input tensor. - is_training: `bool` if True, the model is in training mode. - - Returns: - A version of input tensor with DropBlock applied. - """ - if not is_training or self._dropblock_keep_prob is None: - return net - - logging.info('Applying DropBlock: dropblock_size {}, net.shape {}'.format( - self._dropblock_size, net.shape)) - - if self._data_format == 'channels_last': - _, height, width, _ = net.get_shape().as_list() - else: - _, _, height, width = net.get_shape().as_list() - - total_size = width * height - dropblock_size = min(self._dropblock_size, min(width, height)) - # Seed_drop_rate is the gamma parameter of DropBlcok. - seed_drop_rate = ( - 1.0 - self._dropblock_keep_prob) * total_size / dropblock_size**2 / ( - (width - self._dropblock_size + 1) * - (height - self._dropblock_size + 1)) - - # Forces the block to be inside the feature map. - w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height)) - valid_block = tf.logical_and( - tf.logical_and(w_i >= int(dropblock_size // 2), - w_i < width - (dropblock_size - 1) // 2), - tf.logical_and(h_i >= int(dropblock_size // 2), - h_i < width - (dropblock_size - 1) // 2)) - - if self._data_format == 'channels_last': - valid_block = tf.reshape(valid_block, [1, height, width, 1]) - else: - valid_block = tf.reshape(valid_block, [1, 1, height, width]) - - randnoise = tf.random.uniform(net.shape, dtype=tf.float32) - valid_block = tf.cast(valid_block, dtype=tf.float32) - seed_keep_rate = tf.cast(1 - seed_drop_rate, dtype=tf.float32) - block_pattern = (1 - valid_block + seed_keep_rate + randnoise) >= 1 - block_pattern = tf.cast(block_pattern, dtype=tf.float32) - - if self._data_format == 'channels_last': - ksize = [1, self._dropblock_size, self._dropblock_size, 1] - else: - ksize = [1, 1, self._dropblock_size, self._dropblock_size] - block_pattern = -tf.nn.max_pool2d( - -block_pattern, - ksize=ksize, - strides=[1, 1, 1, 1], - padding='SAME', - data_format='NHWC' if self._data_format == 'channels_last' else 'NCHW') - - percent_ones = tf.cast( - tf.reduce_sum(input_tensor=block_pattern), tf.float32) / tf.cast( - tf.size(input=block_pattern), tf.float32) - - net = net / tf.cast(percent_ones, net.dtype) * tf.cast( - block_pattern, net.dtype) - return net diff --git a/official/vision/detection/modeling/architecture/resnet.py b/official/vision/detection/modeling/architecture/resnet.py index 6654451988e..ccd4c5cde08 100644 --- a/official/vision/detection/modeling/architecture/resnet.py +++ b/official/vision/detection/modeling/architecture/resnet.py @@ -34,14 +34,12 @@ class Resnet(object): def __init__(self, resnet_depth, - dropblock=nn_ops.Dropblock(), batch_norm_relu=nn_ops.BatchNormRelu, data_format='channels_last'): """ResNet initialization function. Args: resnet_depth: `int` depth of ResNet backbone model. - dropblock: a dropblock layer. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). data_format: `str` either "channels_first" for `[batch, channels, height, @@ -49,7 +47,6 @@ def __init__(self, """ self._resnet_depth = resnet_depth - self._dropblock = dropblock self._batch_norm_relu = batch_norm_relu self._data_format = data_format @@ -219,24 +216,20 @@ def bottleneck_block(self, inputs=inputs, filters=filters_out, kernel_size=1, strides=strides) shortcut = self._batch_norm_relu(relu=False)( shortcut, is_training=is_training) - shortcut = self._dropblock(shortcut, is_training=is_training) inputs = self.conv2d_fixed_padding( inputs=inputs, filters=filters, kernel_size=1, strides=1) inputs = self._batch_norm_relu()(inputs, is_training=is_training) - inputs = self._dropblock(inputs, is_training=is_training) inputs = self.conv2d_fixed_padding( inputs=inputs, filters=filters, kernel_size=3, strides=strides) inputs = self._batch_norm_relu()(inputs, is_training=is_training) - inputs = self._dropblock(inputs, is_training=is_training) inputs = self.conv2d_fixed_padding( inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) inputs = self._batch_norm_relu( relu=False, init_zero=True)( inputs, is_training=is_training) - inputs = self._dropblock(inputs, is_training=is_training) return tf.nn.relu(inputs + shortcut)