Skip to content
This repository has been archived by the owner on Sep 11, 2019. It is now read-only.

[WIP] test #70

Open
wants to merge 37 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
d761e41
test change
YangFei1990 Jul 29, 2019
7e14c65
test change
YangFei1990 Jul 29, 2019
a2036a9
test change
YangFei1990 Jul 29, 2019
142cbc1
test change
YangFei1990 Jul 29, 2019
6951f55
test change
YangFei1990 Jul 29, 2019
38593bc
test change
YangFei1990 Jul 29, 2019
3cee190
test change
YangFei1990 Jul 29, 2019
f4fa2e4
test change
YangFei1990 Jul 30, 2019
147d2a3
test
YangFei1990 Jul 30, 2019
821d4b1
test
YangFei1990 Jul 30, 2019
e236bf0
test
YangFei1990 Jul 30, 2019
82742b1
test
YangFei1990 Jul 30, 2019
479fd33
add hvd compression
YangFei1990 Jul 31, 2019
03a864e
add xla
YangFei1990 Aug 1, 2019
df1b03b
minor change
YangFei1990 Aug 1, 2019
b9ac597
modify xla
YangFei1990 Aug 2, 2019
aa2b7d5
minor change
YangFei1990 Aug 2, 2019
847cb4d
minor change
YangFei1990 Aug 2, 2019
732cee8
minor change
YangFei1990 Aug 5, 2019
efd9d5c
add xla scope
YangFei1990 Aug 6, 2019
c831962
fix bug
YangFei1990 Aug 6, 2019
ff450ec
add double bias
YangFei1990 Aug 16, 2019
fde9760
add ena script
YangFei1990 Aug 16, 2019
b5bc06a
minor change
YangFei1990 Aug 19, 2019
6057395
minor change
YangFei1990 Aug 19, 2019
b0e14b3
minor change
YangFei1990 Aug 19, 2019
18cb6ff
minor change
YangFei1990 Aug 19, 2019
405f8b7
add test print
YangFei1990 Aug 19, 2019
c483b0c
minor change
YangFei1990 Aug 19, 2019
7c19bd0
minor change
YangFei1990 Aug 19, 2019
2d7f92b
minor change
YangFei1990 Aug 19, 2019
8b64135
minor change
YangFei1990 Aug 19, 2019
fd56049
minor change
YangFei1990 Aug 19, 2019
896fa26
minor change
YangFei1990 Aug 19, 2019
ab4f2c2
minor change
YangFei1990 Aug 19, 2019
8a25502
freeze the backbone
YangFei1990 Aug 20, 2019
6edac82
add print
YangFei1990 Aug 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions MaskRCNN/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def __ne__(self, _):
_C.TRAIN.BATCH_SIZE_PER_GPU = 1
_C.TRAIN.SEED = 1234
_C.TRAIN.GRADIENT_CLIP = 0 # set non-zero value to enable gradient clip, 0.36 is recommended for 32x4
_C.TRAIN.XLA = False
_C.TRAIN.DOUBLE_BIAS = False

# preprocessing --------------------
# Alternative old (worse & faster) setting: 600
Expand Down
2 changes: 1 addition & 1 deletion MaskRCNN/model/backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def resnet_fpn_backbone(image, num_blocks, seed_gen, fp16=False):
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
with backbone_scope(freeze=freeze_at > 1):
c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1, seed_gen=seed_gen)
with backbone_scope(freeze=False):
with backbone_scope(freeze=True):
c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2, seed_gen=seed_gen)
c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2, seed_gen=seed_gen)
c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2, seed_gen=seed_gen)
Expand Down
35 changes: 31 additions & 4 deletions MaskRCNN/model/generalized_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,32 @@ def variables(self, *args, **kwargs):
return self.opt.variables(*args, **kwargs)


class DoubleBiasOptimizer(tf.train.Optimizer):
def __init__(self, opt):
self.opt = opt

def compute_gradients(self, *args, **kwargs):
gradvars = self.opt.compute_gradients(*args, **kwargs)
grads_and_vars = []
for grad, var in gradvars:
if grad is not None and ('beta:0' in var.name or 'b:0' in var.name):
grad = 2.0 * grad
grads_and_vars.append((grad, var))
return grads_and_vars

def apply_gradients(self, *args, **kwargs):
return self.opt.apply_gradients(*args, **kwargs)

def get_slot(self, *args, **kwargs):
return self.opt.get_slot(*args, **kwargs)

def get_slot_names(self, *args, **kwargs):
return self.opt.get_slot_names(*args, **kwargs)

def variables(self, *args, **kwargs):
return self.opt.variables(*args, **kwargs)


class DetectionModel(ModelDesc):
def __init__(self, fp16):
self.fp16 = fp16
Expand All @@ -71,8 +97,11 @@ def optimizer(self):


opt = tf.train.MomentumOptimizer(lr, 0.9)
if cfg.TRAIN.NUM_GPUS < 8:
opt = optimizer.AccumGradOptimizer(opt, 8 // cfg.TRAIN.NUM_GPUS)
#opt = TestOptimizer(opt)
#if cfg.TRAIN.NUM_GPUS < 8:
# opt = optimizer.AccumGradOptimizer(opt, 8 // cfg.TRAIN.NUM_GPUS)
if cfg.TRAIN.DOUBLE_BIAS:
opt = DoubleBiasOptimizer(opt)
if cfg.TRAIN.GRADIENT_CLIP != 0:
opt = GradientClipOptimizer(opt, cfg.TRAIN.GRADIENT_CLIP)
return opt
Expand All @@ -94,9 +123,7 @@ def get_inference_tensor_names(self):

def build_graph(self, *inputs):
inputs = dict(zip(self.input_names, inputs))

image = self.preprocess(inputs['images']) # NCHW

seed_gen = SeedGenerator(cfg.TRAIN.SEED)

features = self.backbone(image, seed_gen)
Expand Down
200 changes: 125 additions & 75 deletions MaskRCNN/model/rpn.py

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions MaskRCNN/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ def print_runtime_tensor_loose_branch(name, tensor, prefix=None, summarize=-1, t
return tf.identity(trigger_tensor)


def print_runtime_shape_loose_branch(name, tensor, prefix=None, summarize=-1, trigger_tensor=None):
assert trigger_tensor is not None

s = "[runtime_tensor_freehanging_branch] "
if prefix is not None:
s += f'[{prefix}] '
s += name

print_op = tf.print(s, tf.shape(tensor), summarize=summarize)
with tf.control_dependencies([print_op]):
return tf.identity(trigger_tensor)




class ThroughputTracker(tensorpack.Callback):
Expand Down
8 changes: 6 additions & 2 deletions MaskRCNN/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def check_and_log(cmd):

parser.add_argument('--log_full_git_diff', help="Log the full git diff", action="store_false")


parser.add_argument('--xla', help="Enable xla", action="store_true")
#################################################################################################################


Expand Down Expand Up @@ -289,6 +289,10 @@ def check_and_log(cmd):
else:
session_init = get_model_loader(cfg.BACKBONE.WEIGHTS) if cfg.BACKBONE.WEIGHTS else None

sess_config = None
if args.xla:
sess_config = tf.ConfigProto()
sess_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

traincfg = TrainConfig(
model=MODEL,
Expand All @@ -303,7 +307,7 @@ def check_and_log(cmd):
steps_per_epoch=steps_per_epoch,
max_epoch=max_epoch,
session_init=session_init,
session_config=None,
session_config=sess_config,
starting_epoch=cfg.TRAIN.STARTING_EPOCH
)

Expand Down
5 changes: 5 additions & 0 deletions MaskRCNN/utils/mixed_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ def mixed_precision_scope(mixed=True, *args, **kwargs):
custom_getter=float32_variable_storage_getter,
reuse=tf.AUTO_REUSE, *args, **kwargs)

def xla_scope(enable=True):
if not enable:
return suppress()

return tf.contrib.compiler.jit.experimental_jit_scope()
12 changes: 6 additions & 6 deletions infra/ami/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Required on DLAMI 21.2
```
pip uninstall -y protobuf

rm /home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/bin/protoc
rm -r /home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/include/google/protobuf
rm /home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/lib/python3.6/site-packages/protobuf-3.6.0-py3.6-nspkg.pth
rm /home/ubuntu/anaconda3/envs/tensorflow_p36/bin/protoc
rm -r /home/ubuntu/anaconda3/envs/tensorflow_p36/include/google/protobuf
rm /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/protobuf-3.6.0-py3.6-nspkg.pth
rm /home/ubuntu/anaconda3/bin//protoc

wget https://github.com/google/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip
mkdir -p /home/ubuntu/protoc
mv protoc-3.6.1-linux-x86_64.zip /home/ubuntu/protoc/protoc-3.6.1-linux-x86_64.zip
unzip /home/ubuntu/protoc/protoc-3.6.1-linux-x86_64.zip -d protoc
sudo mv /home/ubuntu/protoc/bin/protoc /home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/bin/protoc
sudo mv /home/ubuntu/protoc/include/* /home/ubuntu/anaconda3/envs/tensorflow_p36_13rc1/include
pip install protobuf==3.6.1
sudo mv /home/ubuntu/protoc/bin/protoc /home/ubuntu/anaconda3/envs/tensorflow_p36/bin/protoc
sudo mv /home/ubuntu/protoc/include/* /home/ubuntu/anaconda3/envs/tensorflow_p36/include
pip install protobuf==3.6.1
7 changes: 6 additions & 1 deletion infra/ami/train_efa.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ echo "BATCH_SIZE_PER_GPU: ${BATCH_SIZE_PER_GPU}"
echo "THROUGHPUT_LOG_FREQ: ${THROUGHPUT_LOG_FREQ}"
echo ""


export LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH

mpirun -np ${NUM_GPU} \
--hostfile hosts \
Expand All @@ -32,9 +32,13 @@ mpirun -np ${NUM_GPU} \
-x TENSORPACK_FP16=1 \
-x HOROVOD_CYCLE_TIME=0.5 \
-x HOROVOD_FUSION_THRESHOLD=67108864 \
-x HOROVOD_TIMELINE=~/timeline.json \
python3 /home/ec2-user/tensorpack-mask-rcnn/MaskRCNN/train.py \
--fp16 \
--throughput_log_freq ${THROUGHPUT_LOG_FREQ} \
--tfprof \
--tfprof_start_step 10000 \
--tfprof_end_step 10005 \
--config \
MODE_MASK=True \
MODE_FPN=True \
Expand All @@ -44,6 +48,7 @@ DATA.VAL='("val2017",)' \
TRAIN.BATCH_SIZE_PER_GPU=${BATCH_SIZE_PER_GPU} \
TRAIN.LR_EPOCH_SCHEDULE='[(8, 0.1), (10, 0.01), (12, None)]' \
TRAIN.EVAL_PERIOD=12 \
TRAIN.DOUBLE_BIAS=False \
RPN.TOPK_PER_IMAGE=True \
PREPROC.PREDEFINED_PADDING=True \
BACKBONE.WEIGHTS=/home/ec2-user/data/pretrained-models/ImageNet-R50-AlignPadding.npz \
Expand Down
53 changes: 53 additions & 0 deletions infra/ami/train_ena.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
NUM_GPU=${1:-1}
BATCH_SIZE_PER_GPU=${2:-1}
THROUGHPUT_LOG_FREQ=${3:-2000}


echo ""
echo "NUM_GPU: ${NUM_GPU}"
echo "BATCH_SIZE_PER_GPU: ${BATCH_SIZE_PER_GPU}"
echo "THROUGHPUT_LOG_FREQ: ${THROUGHPUT_LOG_FREQ}"
echo ""

export LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH

mpirun -np ${NUM_GPU} \
--hostfile hosts \
--mca plm_rsh_no_tree_spawn 1 -bind-to none -map-by slot -mca pml ob1 \
-mca btl_vader_single_copy_mechanism none \
--mca btl tcp,self \
--mca btl_tcp_if_exclude lo,docker0 \
-x NCCL_TREE_THRESHOLD=4294967296 \
-x LD_LIBRARY_PATH \
-x PATH \
-x NCCL_SOCKET_IFNAME=^docker0,lo \
-x NCCL_MIN_NRINGS=13 \
-x NCCL_DEBUG=INFO \
-x TENSORPACK_FP16=1 \
-x HOROVOD_CYCLE_TIME=0.5 \
-x HOROVOD_FUSION_THRESHOLD=67108864 \
-x HOROVOD_TIMELINE=~/timeline.json \
python3 /home/ubuntu/tensorpack-mask-rcnn/MaskRCNN/train.py \
--fp16 \
--throughput_log_freq ${THROUGHPUT_LOG_FREQ} \
--tfprof \
--tfprof_start_step 10000 \
--tfprof_end_step 10005 \
--config \
MODE_MASK=True \
MODE_FPN=True \
DATA.BASEDIR=/home/ubuntu/data \
DATA.TRAIN='["train2017"]' \
DATA.VAL='("val2017",)' \
TRAIN.BATCH_SIZE_PER_GPU=${BATCH_SIZE_PER_GPU} \
TRAIN.LR_EPOCH_SCHEDULE='[(8, 0.1), (10, 0.01), (12, None)]' \
TRAIN.EVAL_PERIOD=12 \
TRAIN.DOUBLE_BIAS=False \
RPN.TOPK_PER_IMAGE=True \
PREPROC.PREDEFINED_PADDING=True \
BACKBONE.WEIGHTS=/home/ubuntu/data/pretrained-models/ImageNet-R50-AlignPadding.npz \
BACKBONE.NORM=FreezeBN \
TRAINER=horovod
#For 32x4
#TRAIN.GRADIENT_CLIP=1.5
1 change: 1 addition & 0 deletions tensorpack/train/tower.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def compute_grad_from_inputs(*inputs):
print(f'TENSORPACK_FP16 set. Using FP16 loss scaling of {loss_scale}')
cost *= loss_scale


opt = get_opt_fn()
grads = opt.compute_gradients(
cost, var_list=varlist,
Expand Down
6 changes: 5 additions & 1 deletion tensorpack/train/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,13 @@ def allreduce(self, grads):
# copied from https://github.com/uber/horovod/blob/master/horovod/tensorflow/__init__.py
averaged_gradients = []
with tf.name_scope("HVDAllReduce"):
compression = hvd.Compression.none
if os.getenv("TENSORPACK_COMPRESSION"):
compression = hvd.Compression.fp16
print("Compression enabled....")
for grad, var in grads:
if grad is not None:
avg_grad = hvd.allreduce(grad, average=self._average)
avg_grad = hvd.allreduce(grad, average=self._average, compression=compression)
averaged_gradients.append((avg_grad, var))
else:
averaged_gradients.append((None, var))
Expand Down