diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp
index 56fbace8cc6fa27f8172bed248573f15d0c98dac..bf94abce236853410c15434d494058be03a62a81 100644
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -26,6 +26,8 @@ void DBDetector::LoadModel(const std::string &model_dir) {
     config.DisableGpu();
     if (this->use_mkldnn_) {
       config.EnableMKLDNN();
+      // cache 10 different shapes for mkldnn to avoid memory leak
+      config.SetMkldnnCacheCapacity(10);
     }
     config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
   }
diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
index e37994b562cc4bf593332432a990afe4c6697531..7f88adc54636b4ecc61d257b7cb9159ebcdb82af 100644
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -129,6 +129,8 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
     config.DisableGpu();
     if (this->use_mkldnn_) {
       config.EnableMKLDNN();
+      // cache 10 different shapes for mkldnn to avoid memory leak
+      config.SetMkldnnCacheCapacity(10);
     }
     config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
   }
diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt
index 17b507791cefbf6c6c15974ba92bfc0ac6b60e09..28bacba60d4a599ad951c9820938b38e55b07283 100644
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -3,24 +3,25 @@ use_gpu  0
 gpu_id  0
 gpu_mem  4000
 cpu_math_library_num_threads  10
-use_mkldnn 0
-use_zero_copy_run 0
+use_mkldnn 1
+use_zero_copy_run 1
 
 # det config
 max_side_len  960
 det_db_thresh  0.3
 det_db_box_thresh  0.5
 det_db_unclip_ratio  2.0
-det_model_dir  ../model/det
+det_model_dir  ./inference/det_db
 
 # cls config
 use_angle_cls 0
-cls_model_dir ../model/cls
+cls_model_dir ../inference/cls
 cls_thresh  0.9
 
 # rec config
-rec_model_dir  ../model/rec
-char_list_file ../model/ppocr_keys_v1.txt
+rec_model_dir  ./inference/rec_crnn
+char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
 
 # show the detection results
-visualize 1
\ No newline at end of file
+visualize 1
+
diff --git a/docker/hubserving/README.md b/deploy/docker/hubserving/README.md
similarity index 99%
rename from docker/hubserving/README.md
rename to deploy/docker/hubserving/README.md
index 71e2377dcc4f7524384752b95c53f02471353f34..62381073d4c7448f9a238ca4dda4b294ce864f7a 100644
--- a/docker/hubserving/README.md
+++ b/deploy/docker/hubserving/README.md
@@ -20,7 +20,7 @@ git clone https://github.com/PaddlePaddle/PaddleOCR.git
 ```
 b. Goto Dockerfile directory（ps：Need to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword）
 ```
-cd docker/cpu
+cd deploy/docker/cpu
 ```
 c. Build image
 ```
diff --git a/docker/hubserving/README_cn.md b/deploy/docker/hubserving/README_cn.md
similarity index 99%
rename from docker/hubserving/README_cn.md
rename to deploy/docker/hubserving/README_cn.md
index 9b9e5f50f5b22f3a2125a656112a20542010ac68..f117a0ab4186fea0cb94881c65b2b353bee37ff7 100644
--- a/docker/hubserving/README_cn.md
+++ b/deploy/docker/hubserving/README_cn.md
@@ -20,7 +20,7 @@ git clone https://github.com/PaddlePaddle/PaddleOCR.git
 ```
 b.切换至Dockerfile目录（注：需要区分cpu或gpu版本，下文以cpu为例，gpu版本需要替换一下关键字即可）
 ```
-cd docker/cpu
+cd deploy/docker/cpu
 ```
 c.生成镜像
 ```
diff --git a/docker/hubserving/cpu/Dockerfile b/deploy/docker/hubserving/cpu/Dockerfile
similarity index 100%
rename from docker/hubserving/cpu/Dockerfile
rename to deploy/docker/hubserving/cpu/Dockerfile
diff --git a/docker/hubserving/gpu/Dockerfile b/deploy/docker/hubserving/gpu/Dockerfile
similarity index 100%
rename from docker/hubserving/gpu/Dockerfile
rename to deploy/docker/hubserving/gpu/Dockerfile
diff --git a/docker/hubserving/sample_request.txt b/deploy/docker/hubserving/sample_request.txt
similarity index 100%
rename from docker/hubserving/sample_request.txt
rename to deploy/docker/hubserving/sample_request.txt
diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py
index e27dd1d8738a25c6a6669b99ad2b6eed4a9f25d0..2cf3c8f5c9ebba07ee1c21fe2248fe3f600126d9 100755
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
@@ -90,15 +90,3 @@ def check_and_read_gif(img_path):
         return imgvalue, True
     return None, False
 
-
-def create_multi_devices_program(program, loss_var_name):
-    build_strategy = fluid.BuildStrategy()
-    build_strategy.memory_optimize = False
-    build_strategy.enable_inplace = True
-    exec_strategy = fluid.ExecutionStrategy()
-    exec_strategy.num_iteration_per_drop_scope = 1
-    compile_program = fluid.CompiledProgram(program).with_data_parallel(
-        loss_name=loss_var_name,
-        build_strategy=build_strategy,
-        exec_strategy=exec_strategy)
-    return compile_program
diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py
index bb97c8fcf4ec936309f967ca208e59876b051f17..3e6be234c68dcd82f0f9e844f3ad2859000cec88 100755
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -133,7 +133,6 @@ def main(args):
     image_file_list = get_image_file_list(args.image_dir)
     text_sys = TextSystem(args)
     is_visualize = True
-    tackle_img_num = 0
     for image_file in image_file_list:
         img, flag = check_and_read_gif(image_file)
         if not flag:
@@ -142,9 +141,6 @@ def main(args):
             logger.info("error in loading image:{}".format(image_file))
             continue
         starttime = time.time()
-        tackle_img_num += 1
-        if not args.use_gpu and args.enable_mkldnn and tackle_img_num % 30 == 0:
-            text_sys = TextSystem(args)
         dt_boxes, rec_res = text_sys(img)
         elapse = time.time() - starttime
         print("Predict time of %s: %.3fs" % (image_file, elapse))
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index ac04c2bd2aeb0e48d1db2bbf83de96af6863485d..92212afd5f3e16601939d0ca7882fb3b90c3a9ac 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -112,6 +112,8 @@ def create_predictor(args, mode):
         config.disable_gpu()
         config.set_cpu_math_library_num_threads(6)
         if args.enable_mkldnn:
+            # cache 10 different shapes for mkldnn to avoid memory leak
+            config.set_mkldnn_cache_capacity(10)
             config.enable_mkldnn()
 
     # config.enable_memory_optim()