diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh index 6ebd5c01ff3de389ec5cf9f8aaeb5d0f3f690715..7791c1b3561520a88377e75ea62ba6cb50c4904e 100755 --- a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh +++ b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh @@ -3,8 +3,9 @@ export FLAGS_conv_workspace_size_limit=4000 #MB export FLAGS_cudnn_exhaustive_search=1 export FLAGS_cudnn_batchnorm_spatial_persistent=1 +export FLAGS_max_inplace_grad_add=8 -DATA_DIR="Your image dataset path, e.g. /work/datasets/ILSVRC2012/" +DATA_DIR="./data/ILSVRC2012/" DATA_FORMAT="NHWC" USE_FP16=true #whether to use float16 @@ -17,7 +18,7 @@ fi python train.py \ --model=ResNet50 \ --data_dir=${DATA_DIR} \ - --batch_size=256 \ + --batch_size=128 \ --total_images=1281167 \ --image_shape 3 224 224 \ --class_dim=1000 \ @@ -36,5 +37,7 @@ python train.py \ --reader_thread=10 \ --reader_buf_size=4000 \ --use_dali=${USE_DALI} \ + --fuse_all_optimizer_ops=true \ + --enable_addto=true \ --lr=0.1 diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index 150b44b6e747efae84b1efb5803be30c4f0528ce..68abad9572ac8d2b9a90c0c5c96278c4ec0c910b 100755 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -244,8 +244,16 @@ def train(args): t1 = time.time() for batch in train_iter: #NOTE: this is for benchmark - if args.max_iter and total_batch_num == args.max_iter: + if total_batch_num == 200: + #if args.max_iter and total_batch_num == args.max_iter: + print("=" *20) + print("total_batch_num: ", total_batch_num, "records_num: ", len(train_batch_time_record)) + avg_times = sum(train_batch_time_record[-150:]) / 150 + avg_speed = args.batch_size / avg_times + print("average time: %.5f s/batch, average speed: %.5f imgs/s" % (avg_times, avg_speed)) return + #if args.max_iter and total_batch_num == args.max_iter: + # return train_batch_metrics = exe.run(compiled_train_prog, feed=batch, fetch_list=train_fetch_list) diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index 45c85102acfc7ebbc45e291ebe633211d69b9f42..e5dafa75a24a9f80583a3b2676de7b819f374fd6 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -162,6 +162,8 @@ def parse_args(): add_arg('profiler_path', str, './profilier_files', "the profiler output file path") add_arg('max_iter', int, 0, "the max train batch num") add_arg('same_feed', int, 0, "whether to feed same images") + add_arg('enable_addto', bool, False, "whether to enable the addto strategy for gradient accumulation") + add_arg('fuse_all_optimizer_ops', bool, False, "whether to fuse all optimizer operators") # yapf: enable @@ -524,6 +526,8 @@ def best_strategy_compiled(args, try: fluid.require_version(min_version='1.7.0') build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops + build_strategy.fuse_all_optimizer_ops = args.fuse_all_optimizer_ops + build_strategy.enable_addto = args.enable_addto except Exception as e: logger.info("PaddlePaddle version 1.7.0 or higher is " "required when you want to fuse batch_norm and activation_op.")