diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh
index 7bf6abac505c8140972b207e0b098469bdd4dec2..3a4090c1c43d42fdf72dea90d1df3d53e9a6126d 100755
--- a/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh
+++ b/PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh
@@ -4,11 +4,17 @@ export FLAGS_conv_workspace_size_limit=4000 #MB
 export FLAGS_cudnn_exhaustive_search=1
 export FLAGS_cudnn_batchnorm_spatial_persistent=1
 
+
 DATA_DIR="Your image dataset path, e.g. /work/datasets/ILSVRC2012/"
 
 DATA_FORMAT="NHWC"
 USE_FP16=true #whether to use float16
 USE_DALI=true
+USE_ADDTO=true
+
+if ${USE_ADDTO} ;then
+    export FLAGS_max_inplace_grad_add=8
+fi
 
 if ${USE_DALI}; then
     export FLAGS_fraction_of_gpu_memory_to_use=0.8
@@ -31,6 +37,7 @@ python train.py \
        --fuse_elewise_add_act_ops=true \
        --fuse_bn_act_ops=true \
        --fuse_bn_add_act_ops=true \
+       --enable_addto=${USE_ADDTO} \
        --validate=true \
        --is_profiler=false \
        --profiler_path=profile/ \
diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py
index ad98c6c06cab1c0c2939a24b60df3dcb3c9e3a28..c744bc436f6eb6a10a47088f184686f5c65c26d3 100644
--- a/PaddleCV/image_classification/utils/utility.py
+++ b/PaddleCV/image_classification/utils/utility.py
@@ -146,6 +146,7 @@ def parse_args():
     add_arg('fuse_elewise_add_act_ops', bool,   False,                  "Whether to use elementwise_act fusion.")
     add_arg('fuse_bn_act_ops',          bool,   False,                  "Whether to use batch_norm and act fusion.")
     add_arg('fuse_bn_add_act_ops',      bool,   False,                  "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.")
+    add_arg('enable_addto',             bool,   False,                  "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.")
 
     add_arg('use_label_smoothing',      bool,   False,                  "Whether to use label_smoothing")
     add_arg('label_smoothing_epsilon',  float,  0.1,                    "The value of label_smoothing_epsilon parameter")
@@ -537,6 +538,15 @@ def best_strategy_compiled(args,
                 "required when you want to fuse batch_norm and activation_op.")
         build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops
 
+        try:
+            build_strategy.enable_addto = args.enable_addto
+        except Exception as e:
+            logger.info(
+                "PaddlePaddle 2.0-rc or higher is "
+                "required when you want to enable addto strategy.")
+        build_strategy.enable_addto = args.enable_addto
+
+
         exec_strategy = fluid.ExecutionStrategy()
 
         if args.use_gpu: