diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md
index d7b4fe7b7974a9bd77d16487e64167f4203866a4..f5c9982d37a59f2bcbf090ac81d531a5ec18781e 100644
--- a/PaddleCV/image_classification/README.md
+++ b/PaddleCV/image_classification/README.md
@@ -68,7 +68,6 @@ python train.py \
        --class_dim=1000 \
        --image_shape=3,224,224 \
        --model_save_dir=output/ \
-       --with_mem_opt=False \
        --with_inplace=True \
        --lr_strategy=piecewise_decay \
        --lr=0.1
@@ -83,7 +82,6 @@ python train.py \
 * **class_dim**: the class number of the classification task. Default: 1000.
 * **image_shape**: input size of the network. Default: "3,224,224".
 * **model_save_dir**: the directory to save trained model. Default: "output".
-* **with_mem_opt**: whether to use memory optimization or not. Default: False.
 * **with_inplace**: whether to use inplace memory optimization or not. Default: True.
 * **lr_strategy**: learning rate changing strategy. Default: "piecewise_decay".
 * **lr**: initialized learning rate. Default: 0.1.
@@ -154,8 +152,6 @@ Note: Add and adjust other parameters accroding to specific models and tasks.
 
 You may add `--fp16=1` to start train using mixed precisioin training, which the training process will use float16 and the output model ("master" parameters) is saved as float32. You also may need to pass `--scale_loss` to overcome accuracy issues, usually `--scale_loss=8.0` will do.
 
-Note that currently `--fp16` can not use together with `--with_mem_opt`, so pass `--with_mem_opt=0` to disable memory optimization pass.
-
 ### CE
 
 CE is only for internal testing, don't have to set it.
diff --git a/PaddleCV/image_classification/README_cn.md b/PaddleCV/image_classification/README_cn.md
index ec17a708512d9a54191a1663dae79f2e4378a11f..83128c657eeffd4a34e14892c1b8405d09e248c1 100644
--- a/PaddleCV/image_classification/README_cn.md
+++ b/PaddleCV/image_classification/README_cn.md
@@ -64,7 +64,6 @@ python train.py \
        --class_dim=1000 \
        --image_shape=3,224,224 \
        --model_save_dir=output/ \
-       --with_mem_opt=False \
        --with_inplace=True \
        --lr_strategy=piecewise_decay \
        --lr=0.1
@@ -79,7 +78,6 @@ python train.py \
 * **class_dim**: 类别数，默认值: 1000
 * **image_shape**: 图片大小，默认值: "3,224,224"
 * **model_save_dir**: 模型存储路径，默认值: "output/"
-* **with_mem_opt**: 是否开启显存优化，默认值: False
 * **with_inplace**: 是否开启inplace显存优化，默认值: True
 * **lr_strategy**: 学习率变化策略，默认值: "piecewise_decay"
 * **lr**: 初始学习率，默认值: 0.1
@@ -142,8 +140,6 @@ python infer.py \
 
 可以通过开启`--fp16=True`启动混合精度训练，这样训练过程会使用float16数据，并输出float32的模型参数（"master"参数）。您可能需要同时传入`--scale_loss`来解决fp16训练的精度问题，通常传入`--scale_loss=8.0`即可。
 
-注意，目前混合精度训练不能和内存优化功能同时使用，所以需要传`--with_mem_opt=False`这个参数来禁用内存优化功能。
-
 ### CE测试
 
 注意：CE相关代码仅用于内部测试，enable_ce默认设置False。
diff --git a/PaddleCV/image_classification/dist_train/dist_train.py b/PaddleCV/image_classification/dist_train/dist_train.py
index 11c9063fae64dbddfd6638e1bc6d3ee455336799..fb21e4fa9ade6804585e10464ce245e73077306d 100644
--- a/PaddleCV/image_classification/dist_train/dist_train.py
+++ b/PaddleCV/image_classification/dist_train/dist_train.py
@@ -46,7 +46,6 @@ def parse_args():
     add_arg('class_dim',        int,   1000,                 "Class number.")
     add_arg('image_shape',      str,   "3,224,224",          "input image size")
     add_arg('model_save_dir',   str,   "output",             "model save directory")
-    add_arg('with_mem_opt',     bool,  False,                "Whether to use memory optimization or not.")
     add_arg('pretrained_model', str,   None,                 "Whether to use pretrained model.")
     add_arg('checkpoint',       str,   None,                 "Whether to resume checkpoint.")
     add_arg('lr',               float, 0.1,                  "set learning rate.")
diff --git a/PaddleCV/image_classification/run.sh b/PaddleCV/image_classification/run.sh
index b2466a79d8aa9449e40f7d95359ec496145807ea..ad04260dc973c447e2c7c382cee92051c8903d3c 100644
--- a/PaddleCV/image_classification/run.sh
+++ b/PaddleCV/image_classification/run.sh
@@ -7,7 +7,6 @@ python train.py \
        --class_dim=1000 \
        --image_shape=3,224,224 \
        --model_save_dir=output/ \
-       --with_mem_opt=True \
        --lr_strategy=cosine_decay \
        --lr=0.1 \
        --num_epochs=200 \
@@ -22,7 +21,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=piecewise_decay \
 #       --num_epochs=120 \
 #       --lr=0.01 \
@@ -39,7 +37,6 @@ python train.py \
 #        --model_save_dir=output/ \
 #        --lr=0.02 \
 #        --num_epochs=120 \
-#        --with_mem_opt=True \
 #        --l2_decay=1e-4
 
 #SqueezeNet1_1
@@ -53,7 +50,6 @@ python train.py \
 #        --model_save_dir=output/ \
 #        --lr=0.02 \
 #        --num_epochs=120 \
-#        --with_mem_opt=True \
 #        --l2_decay=1e-4
 
 #VGG11:
@@ -67,7 +63,6 @@ python train.py \
 #        --model_save_dir=output/ \
 #        --lr=0.1 \
 #        --num_epochs=90 \
-#        --with_mem_opt=True \
 #        --l2_decay=2e-4
 
 #VGG13:
@@ -81,7 +76,6 @@ python train.py \
 #        --lr=0.01 \
 #        --num_epochs=90 \
 #        --model_save_dir=output/ \
-#        --with_mem_opt=True \
 #        --l2_decay=3e-4
 
 #VGG16:
@@ -95,7 +89,6 @@ python train.py \
 #        --model_save_dir=output/ \
 #        --lr=0.01 \
 #        --num_epochs=90 \
-#        --with_mem_opt=True \
 #        --l2_decay=3e-4
 
 #VGG19:
@@ -108,7 +101,6 @@ python train.py \
 #        --lr_strategy=cosine_decay \
 #        --lr=0.01 \
 #        --num_epochs=90 \
-#        --with_mem_opt=True \
 #        --model_save_dir=output/ \
 #        --l2_decay=3e-4
 
@@ -120,7 +112,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=piecewise_decay \
 #       --num_epochs=120 \
 #       --lr=0.1 \
@@ -134,7 +125,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --num_epochs=240 \
 #       --lr=0.1 \
@@ -150,7 +140,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --num_epochs=240 \
 #       --lr=0.1 \
@@ -166,7 +155,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --num_epochs=240 \
 #       --lr=0.1 \
@@ -180,7 +168,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --num_epochs=240 \
 #       --lr=0.1 \
@@ -194,7 +181,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --num_epochs=240 \
 #       --lr=0.1 \
@@ -208,7 +194,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.5 \
@@ -225,7 +210,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.5 \
@@ -242,7 +226,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.5 \
@@ -259,7 +242,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.5 \
@@ -274,7 +256,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.25 \
@@ -290,7 +271,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --num_epochs=240 \
 #       --lr=0.25 \
@@ -304,7 +284,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_warmup_decay \
 #       --lr=0.5 \
 #       --num_epochs=240 \
@@ -318,7 +297,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
@@ -332,7 +310,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
@@ -346,7 +323,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=piecewise_decay \
 #       --num_epochs=120 \
 #       --lr=0.1 \
@@ -362,7 +338,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 
@@ -376,7 +351,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=7e-5 \
 #       --use_mixup=True \
@@ -391,7 +365,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=piecewise_decay \
 #       --num_epochs=120 \
 #       --lr=0.1 \
@@ -407,7 +380,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -423,7 +395,6 @@ python train.py \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
 #       --lr_strategy=piecewise_decay \
-#       --with_mem_opt=True \
 #       --lr=0.1 \
 #       --num_epochs=120 \
 #       --l2_decay=1e-4
@@ -438,7 +409,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -455,7 +425,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -472,7 +441,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -486,7 +454,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -503,7 +470,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -517,7 +483,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -534,7 +499,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -548,7 +512,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=15e-5
 
@@ -562,7 +525,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -579,7 +541,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -593,7 +554,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=18e-5
 
@@ -607,7 +567,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -621,7 +580,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -635,7 +593,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -649,7 +606,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -663,7 +619,6 @@ python train.py \
 #       --lr_strategy=piecewise_decay \
 #       --lr=0.1 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4
 
@@ -678,7 +633,6 @@ python train.py \
 #       --model_save_dir=output/ \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --l2_decay=1.2e-4
 
 #SE_ResNeXt101_32x4d:
@@ -692,7 +646,6 @@ python train.py \
 #       --model_save_dir=output/ \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --l2_decay=1.5e-5
 
 # SE_154
@@ -705,7 +658,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -720,7 +672,6 @@ python train.py \
 #       --class_dim=1000 \
 #       --image_shape=3,224,224 \
 #       --model_save_dir=output/ \
-#       --with_mem_opt=True \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.01 \
 #       --num_epochs=200 \
@@ -736,7 +687,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.045 \
 #       --num_epochs=120 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --resize_short_size=320
@@ -751,7 +701,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.045 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -769,7 +718,6 @@ python train.py \
 #       --lr_strategy=cosine_decay \
 #       --lr=0.1 \
 #       --num_epochs=200 \
-#       --with_mem_opt=True \
 #       --model_save_dir=output/ \
 #       --l2_decay=1e-4 \
 #       --use_mixup=True \
@@ -787,7 +735,6 @@ python train.py \
 #       --image_shape=3,224,224 \
 #       --lr=0.001 \
 #       --num_epochs=120 \
-#       --with_mem_opt=False \
 #       --model_save_dir=output/ \
 #       --lr_strategy=adam \
 #       --use_gpu=False
diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py
index 48428becb230a68b399d2db060b7e951c8751efe..6f9563dcf185f183499858429a100b07b57bec8d 100755
--- a/PaddleCV/image_classification/train.py
+++ b/PaddleCV/image_classification/train.py
@@ -443,8 +443,6 @@ def train(args):
     use_ngraph = os.getenv('FLAGS_use_ngraph')
     if not use_ngraph:
         build_strategy = fluid.BuildStrategy()
-        # memopt may affect GC results
-        #build_strategy.memory_optimize = args.with_mem_opt
         build_strategy.enable_inplace = args.with_inplace
         #build_strategy.fuse_all_reduce_ops=1
 
diff --git a/PaddleNLP/Research/ACL2019-JEMT/README.md b/PaddleNLP/Research/ACL2019-JEMT/README.md
index b003b9a2802bfbe6f1fd06831c3117a442642eb4..73e4c02cd3285feb24d1b22e51fcbd754b226ea0 100644
--- a/PaddleNLP/Research/ACL2019-JEMT/README.md
+++ b/PaddleNLP/Research/ACL2019-JEMT/README.md
@@ -81,7 +81,6 @@ beta代表发音信息的权重。这表明，即使将绝大部分权重放在
   --sort_type pool \
   --pool_size 200000 \
   --use_py_reader False \
-  --use_mem_opt False \
   --enable_ce False \
   --fetch_steps 1 \
   pass_num 100 \