diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh index 7bf6abac505c8140972b207e0b098469bdd4dec2..3a4090c1c43d42fdf72dea90d1df3d53e9a6126d 100755 --- a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh +++ b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh @@ -4,11 +4,17 @@ export FLAGS_conv_workspace_size_limit=4000 #MB export FLAGS_cudnn_exhaustive_search=1 export FLAGS_cudnn_batchnorm_spatial_persistent=1 + DATA_DIR="Your image dataset path, e.g. /work/datasets/ILSVRC2012/" DATA_FORMAT="NHWC" USE_FP16=true #whether to use float16 USE_DALI=true +USE_ADDTO=true + +if ${USE_ADDTO} ;then + export FLAGS_max_inplace_grad_add=8 +fi if ${USE_DALI}; then export FLAGS_fraction_of_gpu_memory_to_use=0.8 @@ -31,6 +37,7 @@ python train.py \ --fuse_elewise_add_act_ops=true \ --fuse_bn_act_ops=true \ --fuse_bn_add_act_ops=true \ + --enable_addto=${USE_ADDTO} \ --validate=true \ --is_profiler=false \ --profiler_path=profile/ \ diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index ad98c6c06cab1c0c2939a24b60df3dcb3c9e3a28..c744bc436f6eb6a10a47088f184686f5c65c26d3 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -146,6 +146,7 @@ def parse_args(): add_arg('fuse_elewise_add_act_ops', bool, False, "Whether to use elementwise_act fusion.") add_arg('fuse_bn_act_ops', bool, False, "Whether to use batch_norm and act fusion.") add_arg('fuse_bn_add_act_ops', bool, False, "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.") + add_arg('enable_addto', bool, False, "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.") add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing") add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter") @@ -537,6 +538,15 @@ def best_strategy_compiled(args, "required when you want to fuse batch_norm and activation_op.") build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops + try: + build_strategy.enable_addto = args.enable_addto + except Exception as e: + logger.info( + "PaddlePaddle 2.0-rc or higher is " + "required when you want to enable addto strategy.") + build_strategy.enable_addto = args.enable_addto + + exec_strategy = fluid.ExecutionStrategy() if args.use_gpu: