From 6999f340558595344170f410523c7d9cc673c532 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Fri, 29 May 2020 14:49:17 +0800 Subject: [PATCH] new dataloader and boxing v2 params --- cnn_e2e/of_cnn_train_val.py | 18 ++++++++++++++---- cnn_e2e/resnet_model.py | 7 ++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/cnn_e2e/of_cnn_train_val.py b/cnn_e2e/of_cnn_train_val.py index 5e5cf21..0c832c6 100755 --- a/cnn_e2e/of_cnn_train_val.py +++ b/cnn_e2e/of_cnn_train_val.py @@ -39,18 +39,25 @@ model_dict = { flow.config.gpu_device_num(args.gpu_num_per_node) flow.config.enable_debug_mode(True) +if args.use_boxing_v2: + flow.config.collective_boxing.nccl_fusion_threshold_mb(8) + flow.config.collective_boxing.nccl_fusion_all_reduce_use_buffer(False) + @flow.function(get_train_config(args)) def TrainNet(): if args.train_data_dir: assert os.path.exists(args.train_data_dir) print("Loading data from {}".format(args.train_data_dir)) - (labels, images) = ofrecord_util.load_imagenet_for_training2(args) + if args.use_new_dataloader: + (labels, images) = ofrecord_util.load_imagenet_for_training2(args) + else: + (labels, images) = ofrecord_util.load_imagenet_for_training(args) # note: images.shape = (N C H W) in cc's new dataloader(load_imagenet_for_training2) else: print("Loading synthetic data.") (labels, images) = ofrecord_util.load_synthetic(args) - logits = model_dict[args.model](images) + logits = model_dict[args.model](images, need_transpose=not args.use_new_dataloader) loss = flow.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name="softmax_loss") loss = flow.math.reduce_mean(loss) flow.losses.add_loss(loss) @@ -64,12 +71,15 @@ def InferenceNet(): if args.val_data_dir: assert os.path.exists(args.val_data_dir) print("Loading data from {}".format(args.val_data_dir)) - (labels, images) = ofrecord_util.load_imagenet_for_validation2(args) + if args.use_new_dataloader: + (labels, images) = ofrecord_util.load_imagenet_for_validation2(args) + else: + (labels, images) = ofrecord_util.load_imagenet_for_validation(args) else: print("Loading synthetic data.") (labels, images) = ofrecord_util.load_synthetic(args) - logits = model_dict[args.model](images) + logits = model_dict[args.model](images, need_transpose=not args.use_new_dataloader) predictions = flow.nn.softmax(logits) outputs = {"predictions":predictions, "labels": labels} return outputs diff --git a/cnn_e2e/resnet_model.py b/cnn_e2e/resnet_model.py index 52e865b..32a2a40 100755 --- a/cnn_e2e/resnet_model.py +++ b/cnn_e2e/resnet_model.py @@ -26,7 +26,7 @@ def _conv2d( ): weight = flow.get_variable( name + "-weight", - shape=(filters, input.static_shape[1], kernel_size, kernel_size), + shape=(filters, input.shape[1], kernel_size, kernel_size), dtype=input.dtype, initializer=weight_initializer, regularizer=weight_regularizer, @@ -125,10 +125,11 @@ def resnet_stem(input): return pool1 -def resnet50(images, trainable=True): +def resnet50(images, trainable=True, need_transpose=False): # note: images.shape = (N C H W) in cc's new dataloader, transpose is not needed anymore - # images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) + if need_transpose: + images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) with flow.deprecated.variable_scope("Resnet"): stem = resnet_stem(images) -- GitLab