diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt
index bd13a46713e1239380891e25c3ee7cb68f0f8d1e..1c4a2304a7a2028e8722246df75d9e3e9a9311d7 100644
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -73,7 +73,11 @@ endif()
 if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
     include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
 endif()
-include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+# zlib does not exist in 1.8.1
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
+    include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+endif()
+
 include_directories("${PADDLE_DIR}/third_party/boost")
 include_directories("${PADDLE_DIR}/third_party/eigen3")
 
@@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
     link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
 endif()
 
-link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+endif()
+
 link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
 link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
@@ -186,8 +193,13 @@ if(WITH_STATIC_LIB)
     set(DEPS
         ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
-    set(DEPS
-        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    if (NOT WIN32)
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    else()
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
 endif()
 
 if (NOT WIN32)
@@ -204,13 +216,16 @@ if (NOT WIN32)
 else()
     set(DEPS ${DEPS}
         ${MATH_LIB} ${MKLDNN_LIB}
-        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+        glog gflags_static libprotobuf xxhash libyaml-cppmt)
 
+    if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+      set(DEPS ${DEPS} zlibstatic)
+    endif()
     set(DEPS ${DEPS} libcmt shlwapi)
     if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
         set(DEPS ${DEPS} snappy)
     endif()
-    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
         set(DEPS ${DEPS} snappystream)
     endif()
 endif(NOT WIN32)
@@ -236,7 +251,9 @@ if(WITH_ENCRYPTION)
       link_directories("${ENCRYPTION_DIR}/lib")
       set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
   else()
-    message(FATAL_ERROR "Encryption Tool don't support WINDOWS")
+      include_directories("${ENCRYPTION_DIR}/include")
+      link_directories("${ENCRYPTION_DIR}/lib")
+      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
   endif()
 endif()
 
@@ -284,10 +301,23 @@ if (WIN32 AND WITH_MKL)
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
-
         COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
     )
-
+    # for encryption
+    if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
+        add_custom_command(TARGET classifier POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET detector POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET segmenter POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+    endif()
 endif()
 
 file(COPY  "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"
diff --git a/deploy/cpp/CMakeSettings.json b/deploy/cpp/CMakeSettings.json
index 860ca7a61e222d84e5cc7e9b3447bdc8397a8c40..bc18d376daba4a2fb2b3982ecc43eca5fbac0c85 100644
--- a/deploy/cpp/CMakeSettings.json
+++ b/deploy/cpp/CMakeSettings.json
@@ -21,6 +21,11 @@
                     "value": "C:/projects/fluid_install_dir_win_cpu_1.6/fluid_install_dir_win_cpu_1.6",
                     "type": "PATH"
                 },
+                {
+                    "name": "CUDA_LIB",
+                    "value": "",
+                    "type": "PATH"
+                },
                 {
                     "name": "CMAKE_BUILD_TYPE",
                     "value": "Release",
@@ -40,8 +45,18 @@
                     "name": "WITH_GPU",
                     "value": "False",
                     "type": "BOOL"
+                },
+                {
+                    "name": "WITH_ENCRYPTION",
+                    "value": "False",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "ENCRYPTION_DIR",
+                    "value": "",
+                    "type": "PATH"
                 }
             ]
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/deploy/cpp/demo/classifier.cpp b/deploy/cpp/demo/classifier.cpp
index badb835132418098d332014a590d2dbb7a1e43fd..e616d0e78fe2eb52eaef7303ee3f5a85570d797b 100644
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -13,14 +13,19 @@
 // limitations under the License.
 
 #include <glog/logging.h>
+#include <omp.h>
 
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
-
+#include <utility>
 #include "include/paddlex/paddlex.h"
 
+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -28,6 +33,10 @@ DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 
 int main(int argc, char** argv) {
   // Parsing command-line
@@ -44,32 +53,81 @@ int main(int argc, char** argv) {
 
   // 加载模型
   PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
 
   // 进行预测
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
   if (FLAGS_image_list != "") {
     std::ifstream inf(FLAGS_image_list);
     if (!inf) {
       std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
       return -1;
     }
+    // 多batch预测
     std::string image_path;
+    std::vector<std::string> image_paths;
     while (getline(inf, image_path)) {
-      PaddleX::ClsResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
-      std::cout << "Predict label: " << result.category
-                << ", label_id:" << result.category_id
-                << ", score: " << result.score << std::endl;
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      // 读图像
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::ClsResult> results(im_vec_size - i,
+                                              PaddleX::ClsResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      for (int j = i; j < im_vec_size; ++j) {
+        std::cout << "Path:" << image_paths[j]
+                  << ", predict label: " << results[j - i].category
+                  << ", label_id:" << results[j - i].category_id
+                  << ", score: " << results[j - i].score << std::endl;
+      }
     }
   } else {
+    auto start = system_clock::now();
     PaddleX::ClsResult result;
     cv::Mat im = cv::imread(FLAGS_image, 1);
     model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
     std::cout << "Predict label: " << result.category
               << ", label_id:" << result.category_id
               << ", score: " << result.score << std::endl;
   }
-
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read time: " << total_imread_time_s / imgs
+            << " s/img, batch_size = " << FLAGS_batch_size << std::endl;
   return 0;
 }
diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp
index e42288fbccd434ef5953c606696af623323aa80d..81046552914d21c1cf1d0d408217663708b25f80 100644
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -13,15 +13,21 @@
 // limitations under the License.
 
 #include <glog/logging.h>
+#include <omp.h>
 
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
 
+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +36,13 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 
 int main(int argc, char** argv) {
   // 解析命令行参数
@@ -43,11 +56,19 @@ int main(int argc, char** argv) {
     std::cerr << "--image or --image_list need to be defined" << std::endl;
     return -1;
   }
-
+  std::cout << "Thread num: " << FLAGS_thread_num << std::endl;
   // 加载模型
   PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
 
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
   auto colormap = PaddleX::GenerateColorMap(model.labels.size());
   std::string save_dir = "output";
   // 进行预测
@@ -58,47 +79,76 @@ int main(int argc, char** argv) {
       return -1;
     }
     std::string image_path;
+    std::vector<std::string> image_paths;
     while (getline(inf, image_path)) {
-      PaddleX::DetResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
-      for (int i = 0; i < result.boxes.size(); ++i) {
-        std::cout << "image file: " << image_path
-                  << ", predict label: " << result.boxes[i].category
-                  << ", label_id:" << result.boxes[i].category_id
-                  << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
-                  << result.boxes[i].coordinate[0] << ", "
-                  << result.boxes[i].coordinate[1] << ", "
-                  << result.boxes[i].coordinate[2] << ", "
-                  << result.boxes[i].coordinate[3] << ")" << std::endl;
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::DetResult> results(im_vec_size - i,
+                                              PaddleX::DetResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      // 输出结果目标框
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        for (int k = 0; k < results[j].boxes.size(); ++k) {
+          std::cout << "image file: " << image_paths[i + j] << ", ";
+          std::cout << "predict label: " << results[j].boxes[k].category
+                    << ", label_id:" << results[j].boxes[k].category_id
+                    << ", score: " << results[j].boxes[k].score
+                    << ", box(xmin, ymin, w, h):("
+                    << results[j].boxes[k].coordinate[0] << ", "
+                    << results[j].boxes[k].coordinate[1] << ", "
+                    << results[j].boxes[k].coordinate[2] << ", "
+                    << results[j].boxes[k].coordinate[3] << ")" << std::endl;
+        }
       }
-
       // 可视化
-      cv::Mat vis_img =
-          PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
-      std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
-      cv::imwrite(save_path, vis_img);
-      result.clear();
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        cv::Mat vis_img = PaddleX::Visualize(
+            im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);
+        std::string save_path =
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
     }
   } else {
     PaddleX::DetResult result;
     cv::Mat im = cv::imread(FLAGS_image, 1);
     model.predict(im, &result);
     for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
       std::cout << ", predict label: " << result.boxes[i].category
                 << ", label_id:" << result.boxes[i].category_id
-                << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
-                << result.boxes[i].coordinate[0] << ", "
-                << result.boxes[i].coordinate[1] << ", "
+                << ", score: " << result.boxes[i].score
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
+                << ", " << result.boxes[i].coordinate[1] << ", "
                 << result.boxes[i].coordinate[2] << ", "
                 << result.boxes[i].coordinate[3] << ")" << std::endl;
     }
 
     // 可视化
     cv::Mat vis_img =
-        PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);
     std::string save_path =
         PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
     cv::imwrite(save_path, vis_img);
@@ -106,5 +156,11 @@ int main(int argc, char** argv) {
     std::cout << "Visualized output saved as " << save_path << std::endl;
   }
 
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
+
   return 0;
 }
diff --git a/deploy/cpp/demo/segmenter.cpp b/deploy/cpp/demo/segmenter.cpp
index 0492ef803e15268022d869eb8b8e84969b1c8fad..b9a81886414dc6505b9e82a8f79588d2faf42a2f 100644
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -13,15 +13,20 @@
 // limitations under the License.
 
 #include <glog/logging.h>
+#include <omp.h>
 
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
-
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
 
+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +35,10 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 
 int main(int argc, char** argv) {
   // 解析命令行参数
@@ -46,8 +55,16 @@ int main(int argc, char** argv) {
 
   // 加载模型
   PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
 
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
   auto colormap = PaddleX::GenerateColorMap(model.labels.size());
   // 进行预测
   if (FLAGS_image_list != "") {
@@ -57,23 +74,54 @@ int main(int argc, char** argv) {
       return -1;
     }
     std::string image_path;
+    std::vector<std::string> image_paths;
     while (getline(inf, image_path)) {
-      PaddleX::SegResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::SegResult> results(im_vec_size - i,
+                                              PaddleX::SegResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
       // 可视化
-      cv::Mat vis_img =
-          PaddleX::Visualize(im, result, model.labels, colormap);
-      std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
-      cv::imwrite(save_path, vis_img);
-      result.clear();
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        cv::Mat vis_img =
+            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap);
+        std::string save_path =
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
     }
   } else {
+    auto start = system_clock::now();
     PaddleX::SegResult result;
     cv::Mat im = cv::imread(FLAGS_image, 1);
     model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
     // 可视化
     cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
     std::string save_path =
@@ -82,6 +130,11 @@ int main(int argc, char** argv) {
     result.clear();
     std::cout << "Visualized output saved as " << save_path << std::endl;
   }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
 
   return 0;
 }
diff --git a/deploy/cpp/include/paddlex/config_parser.h b/deploy/cpp/include/paddlex/config_parser.h
index 5303e4da7ac0eb3de73bc57059617d361065f136..850e46656d9efdb5374e3086757cb5350f0457b2 100644
--- a/deploy/cpp/include/paddlex/config_parser.h
+++ b/deploy/cpp/include/paddlex/config_parser.h
@@ -54,4 +54,4 @@ class ConfigPaser {
   YAML::Node Transforms_;
 };
 
-}  // namespace PaddleDetection
+}  // namespace PaddleX
diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h
index d000728c763666e46271d4602b0e42c41dc130f1..74b478c0fedae13c1f25eb125f27b85e1755cd45 100644
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -16,8 +16,11 @@
 
 #include <functional>
 #include <iostream>
+#include <map>
+#include <memory>
 #include <numeric>
-
+#include <string>
+#include <vector>
 #include "yaml-cpp/yaml.h"
 
 #ifdef _WIN32
@@ -28,53 +31,193 @@
 
 #include "paddle_inference_api.h"  // NOLINT
 
-#include "config_parser.h"
-#include "results.h"
-#include "transforms.h"
+#include "config_parser.h"  // NOLINT
+#include "results.h"  // NOLINT
+#include "transforms.h"  // NOLINT
 
 #ifdef WITH_ENCRYPTION
-#include "paddle_model_decrypt.h"
-#include "model_code.h"
+#include "paddle_model_decrypt.h"  // NOLINT
+#include "model_code.h"  // NOLINT
 #endif
 
 namespace PaddleX {
 
+/*
+ * @brief
+ * This class encapsulates all necessary proccess steps of model infering, which
+ * include image matrix preprocessing, model predicting and results postprocessing.
+ * The entire process of model infering can be simplified as below:
+ * 1. preprocess image matrix (resize, padding, ......)
+ * 2. model infer
+ * 3. postprocess the results which generated from model infering
+ *
+ * @example
+ *  PaddleX::Model cls_model;
+ *  // initialize model configuration
+ *  cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key);
+ *  // define a Classification result object
+ *  PaddleX::ClsResult cls_result;
+ *  // get image matrix from image file
+ *  cv::Mat im = cv::imread(image_file_path, 1);
+ *  cls_model.predict(im, &cls_result);
+ * */
 class Model {
  public:
+  /*
+   * @brief
+   * This method aims to initialize the model configuration
+   *
+   * @param model_dir: the directory which contains model.yml
+   * @param use_gpu: use gpu or not when infering
+   * @param use_trt: use Tensor RT or not when infering
+   * @param gpu_id: the id of gpu when infering with using gpu
+   * @param key: the key of encryption when using encrypted model
+   * @param batch_size: batch size of infering
+   * */
   void Init(const std::string& model_dir,
             bool use_gpu = false,
             bool use_trt = false,
             int gpu_id = 0,
-            std::string key = "") {
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key);
+            std::string key = "",
+            int batch_size = 1) {
+    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, batch_size);
   }
 
   void create_predictor(const std::string& model_dir,
                         bool use_gpu = false,
                         bool use_trt = false,
                         int gpu_id = 0,
-                        std::string key = "");
-
-  bool load_config(const std::string& model_dir);
-
+                        std::string key = "",
+                        int batch_size = 1);
+
+  /*
+   * @brief
+   * This method aims to load model configurations which include
+   * transform steps and label list
+   *
+   * @param yaml_input:  model configuration string
+   * @return true if load configuration successfully
+   * */
+  bool load_config(const std::string& yaml_input);
+
+  /*
+   * @brief
+   * This method aims to transform single image matrix, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im: single image matrix to be transformed
+   * @param blob: the raw data of single image matrix after transformed
+   * @return true if preprocess image matrix successfully
+   * */
   bool preprocess(const cv::Mat& input_im, ImageBlob* blob);
 
+  /*
+   * @brief
+   * This method aims to transform mutiple image matrixs, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im_batch: a batch of image matrixs to be transformed
+   * @param blob_blob: raw data of a batch of image matrixs after transformed
+   * @param thread_num: the number of preprocessing threads,
+   *                    each thread run preprocess on single image matrix
+   * @return true if preprocess a batch of image matrixs successfully
+   * */
+  bool preprocess(const std::vector<cv::Mat> &input_im_batch,
+                  std::vector<ImageBlob> *blob_batch,
+                  int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: classification prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
   bool predict(const cv::Mat& im, ClsResult* result);
 
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on a batch of image matrixs,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrixs to be predicted
+   * @param results: a batch of classification prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<ClsResult> *results,
+               int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on single image matrix, the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
   bool predict(const cv::Mat& im, DetResult* result);
 
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on a batch of image matrixs, the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<DetResult> *result,
+               int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
   bool predict(const cv::Mat& im, SegResult* result);
 
-  bool postprocess(SegResult* result);
-
-  bool postprocess(DetResult* result);
-
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on a batch of image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<SegResult> *result,
+               int thread_num = 1);
+
+  // model type, include 3 type: classifier, detector, segmenter
   std::string type;
+  // model name, such as FasterRCNN, YOLOV3 and so on.
   std::string name;
   std::map<int, std::string> labels;
+  // transform(preprocessing) pipeline manager
   Transforms transforms_;
+  // single input preprocessed data
   ImageBlob inputs_;
+  // batch input preprocessed data
+  std::vector<ImageBlob> inputs_batch_;
+  // raw data of predicting results
   std::vector<float> outputs_;
+  // a predictor which run the model predicting
   std::unique_ptr<paddle::PaddlePredictor> predictor_;
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
diff --git a/deploy/cpp/include/paddlex/results.h b/deploy/cpp/include/paddlex/results.h
index 1643c9249e8e8e993017c7702d1d490352c2d9a8..72caa1f5d4f78275ca9c4de55aa89bc22edd02e5 100644
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
@@ -20,9 +20,15 @@
 
 namespace PaddleX {
 
+/*
+ * @brief
+ * This class represents mask in instance segmentation tasks.
+ * */
 template <class T>
 struct Mask {
+  // raw data of mask
   std::vector<T> data;
+  // the shape of mask
   std::vector<int> shape;
   void clear() {
     data.clear();
@@ -30,19 +36,34 @@ struct Mask {
   }
 };
 
+/*
+ * @brief 
+ * This class represents target box in detection or instance segmentation tasks.
+ * */
 struct Box {
   int category_id;
+  // category label this box belongs to
   std::string category;
+  // confidence score
   float score;
   std::vector<float> coordinate;
   Mask<float> mask;
 };
 
+/*
+ * @brief
+ * This class is prediction result based class.
+ * */
 class BaseResult {
  public:
+  // model type
   std::string type = "base";
 };
 
+/*
+ * @brief
+ * This class represent classification result.
+ * */
 class ClsResult : public BaseResult {
  public:
   int category_id;
@@ -51,17 +72,28 @@ class ClsResult : public BaseResult {
   std::string type = "cls";
 };
 
+/*
+ * @brief
+ * This class represent detection or instance segmentation result.
+ * */
 class DetResult : public BaseResult {
  public:
+  // target boxes
   std::vector<Box> boxes;
   int mask_resolution;
   std::string type = "det";
   void clear() { boxes.clear(); }
 };
 
+/*
+ * @brief
+ * This class represent segmentation result.
+ * */
 class SegResult : public BaseResult {
  public:
+  // represent label of each pixel on image matrix
   Mask<int64_t> label_map;
+  // represent score of each pixel on image matrix
   Mask<float> score_map;
   std::string type = "seg";
   void clear() {
diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h
index f8265db447f693d084c5a789504bc4b0ccc14d28..23608d4c333a8ee44b5386d515b9d89c25ccdf7f 100644
--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -28,7 +28,10 @@
 
 namespace PaddleX {
 
-// Object for storing all preprocessed data
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
 class ImageBlob {
  public:
   // Original image height and width
@@ -45,21 +48,34 @@ class ImageBlob {
   std::vector<float> im_data_;
 
   void clear() {
-    ori_im_size_.clear();
-    new_im_size_.clear();
     im_size_before_resize_.clear();
     reshape_order_.clear();
     im_data_.clear();
   }
 };
 
-// Abstraction of preprocessing opration class
+/*
+ * @brief
+ * Abstraction of preprocessing operation class
+ * */
 class Transform {
  public:
   virtual void Init(const YAML::Node& item) = 0;
+  /*
+   * @brief
+   * This method executes preprocessing operation on image matrix,
+   * result will be returned at second parameter.
+   * @param im: single image matrix to be preprocessed
+   * @param data: the raw data of single image matrix after preprocessed
+   * @return true if transform successfully
+   * */
   virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
 };
 
+/*
+ * @brief
+ * This class execute normalization operation on image matrix
+ * */
 class Normalize : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -74,6 +90,14 @@ class Normalize : public Transform {
   std::vector<float> std_;
 };
 
+/*
+ * @brief
+ * This class execute resize by short operation on image matrix. At first, it resizes
+ * the short side of image matrix to specified length. Accordingly, the long side
+ * will be resized in the same proportion. If new length of long side exceeds max
+ * size, the long size will be resized to max size, and the short size will be
+ * resized in the same proportion
+ * */
 class ResizeByShort : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -92,6 +116,12 @@ class ResizeByShort : public Transform {
   int max_size_;
 };
 
+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
 class ResizeByLong : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -103,6 +133,11 @@ class ResizeByLong : public Transform {
   int long_size_;
 };
 
+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
 class Resize : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -128,6 +163,11 @@ class Resize : public Transform {
   std::string interp_;
 };
 
+/*
+ * @brief
+ * This class execute center crop operation on image matrix. It crops the center
+ * of image matrix accroding to specified size.
+ * */
 class CenterCrop : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -147,6 +187,11 @@ class CenterCrop : public Transform {
   int width_;
 };
 
+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
 class Padding : public Transform {
  public:
   virtual void Init(const YAML::Node& item) {
@@ -175,7 +220,11 @@ class Padding : public Transform {
   int width_ = 0;
   int height_ = 0;
 };
-
+/*
+ * @brief
+ * This class is transform operations manager. It stores all neccessary
+ * transform operations and run them in correct order.
+ * */
 class Transforms {
  public:
   void Init(const YAML::Node& node, bool to_rgb = true);
diff --git a/deploy/cpp/include/paddlex/visualize.h b/deploy/cpp/include/paddlex/visualize.h
index 7a71f474d028795aa1dec3cd993f5480c0906ced..5a8e39a762ff5bbde966aff213f1751f520789e2 100644
--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
@@ -43,20 +43,55 @@
 
 namespace PaddleX {
 
-// Generate visualization colormap for each class
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
 std::vector<int> GenerateColorMap(int num_class);
 
+
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                      const DetResult& results,
                      const std::map<int, std::string>& labels,
                      const std::vector<int>& colormap,
                      float threshold = 0.5);
 
+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                      const SegResult& result,
                      const std::map<int, std::string>& labels,
                      const std::vector<int>& colormap);
 
+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
 std::string generate_save_path(const std::string& save_dir,
                                const std::string& file_path);
-}  // namespce of PaddleX
+}  // namespace PaddleX
diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp
index 90a4a4452b9e5f3eba1c0b4c7ab88f5b91e03971..e7fd9402b8ec6daa87dbba701699659a36416cad 100644
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -11,32 +11,50 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
+#include <omp.h>
+#include <algorithm>
+#include <fstream>
+#include <cstring>
 #include "include/paddlex/paddlex.h"
-
 namespace PaddleX {
 
 void Model::create_predictor(const std::string& model_dir,
                              bool use_gpu,
                              bool use_trt,
                              int gpu_id,
-                             std::string key) {
-  // 读取配置文件
-  if (!load_config(model_dir)) {
-    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
-    exit(-1);
-  }
+                             std::string key,
+                             int batch_size) {
   paddle::AnalysisConfig config;
   std::string model_file = model_dir + OS_PATH_SEP + "__model__";
   std::string params_file = model_dir + OS_PATH_SEP + "__params__";
+  std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
+  std::string yaml_input = "";
 #ifdef WITH_ENCRYPTION
-  if (key != ""){
+  if (key != "") {
     model_file = model_dir + OS_PATH_SEP + "__model__.encrypted";
     params_file = model_dir + OS_PATH_SEP + "__params__.encrypted";
-    paddle_security_load_model(&config, key.c_str(), model_file.c_str(), params_file.c_str());
+    yaml_file = model_dir + OS_PATH_SEP + "model.yml.encrypted";
+    paddle_security_load_model(
+        &config, key.c_str(), model_file.c_str(), params_file.c_str());
+    yaml_input = decrypt_file(yaml_file.c_str(), key.c_str());
   }
 #endif
-  if (key == ""){
+  if (yaml_input == "") {
+    // 读取配置文件
+    std::ifstream yaml_fin(yaml_file);
+    yaml_fin.seekg(0, std::ios::end);
+    size_t yaml_file_size = yaml_fin.tellg();
+    yaml_input.assign(yaml_file_size, ' ');
+    yaml_fin.seekg(0);
+    yaml_fin.read(&yaml_input[0], yaml_file_size);
+  }
+  // 读取配置文件内容
+  if (!load_config(yaml_input)) {
+    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
+    exit(-1);
+  }
+
+  if (key == "") {
     config.SetModel(model_file, params_file);
   }
   if (use_gpu) {
@@ -58,20 +76,20 @@ void Model::create_predictor(const std::string& model_dir,
         false /* use_calib_mode*/);
   }
   predictor_ = std::move(CreatePaddlePredictor(config));
+  inputs_batch_.assign(batch_size, ImageBlob());
 }
 
-bool Model::load_config(const std::string& model_dir) {
-  std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
-  YAML::Node config = YAML::LoadFile(yaml_file);
+bool Model::load_config(const std::string& yaml_input) {
+  YAML::Node config = YAML::Load(yaml_input);
   type = config["_Attributes"]["model_type"].as<std::string>();
   name = config["Model"].as<std::string>();
   std::string version = config["version"].as<std::string>();
   if (version[0] == '0') {
-    std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
-              << "cannot be done, please refer to "
-              << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
-              << "to transfer version."
-              << std::endl;
+    std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, "
+              << "deployment cannot be done, please refer to "
+              << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs"
+              << "/tutorials/deploy/upgrade_version.md "
+              << "to transfer version." << std::endl;
     return false;
   }
   bool to_rgb = true;
@@ -104,17 +122,29 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
   return true;
 }
 
+// use openmp
+bool Model::preprocess(const std::vector<cv::Mat>& input_im_batch,
+                       std::vector<ImageBlob>* blob_batch,
+                       int thread_num) {
+  int batch_size = input_im_batch.size();
+  bool success = true;
+  thread_num = std::min(thread_num, batch_size);
+  #pragma omp parallel for num_threads(thread_num)
+  for (int i = 0; i < input_im_batch.size(); ++i) {
+    cv::Mat im = input_im_batch[i].clone();
+    if (!transforms_.Run(&im, &(*blob_batch)[i])) {
+      success = false;
+    }
+  }
+  return success;
+}
+
 bool Model::predict(const cv::Mat& im, ClsResult* result) {
   inputs_.clear();
   if (type == "detector") {
     std::cerr << "Loading model is a 'detector', DetResult should be passed to "
                  "function predict()!"
-              << std::endl;
-    return false;
-  } else if (type == "segmenter") {
-    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
-                 "to function predict()!"
-              << std::endl;
+                 "to function predict()!" << std::endl;
     return false;
   }
   // 处理输入图像
@@ -144,20 +174,79 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
   result->category_id = std::distance(std::begin(outputs_), ptr);
   result->score = *ptr;
   result->category = labels[result->category_id];
+  return true;
+}
+
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<ClsResult>* results,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // 使用加载的模型进行预测
+  int batch_size = im_batch.size();
+  auto in_tensor = predictor_->GetInputTensor("image");
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  in_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  in_tensor->copy_from_cpu(inputs_data.data());
+  // in_tensor->copy_from_cpu(inputs_.im_data_.data());
+  predictor_->ZeroCopyRun();
+  // 取出模型的输出结果
+  auto output_names = predictor_->GetOutputNames();
+  auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_shape = output_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_shape) {
+    size *= i;
+  }
+  outputs_.resize(size);
+  output_tensor->copy_to_cpu(outputs_.data());
+  // 对模型输出结果进行后处理
+  int single_batch_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    auto start_ptr = std::begin(outputs_);
+    auto end_ptr = std::begin(outputs_);
+    std::advance(start_ptr, i * single_batch_size);
+    std::advance(end_ptr, (i + 1) * single_batch_size);
+    auto ptr = std::max_element(start_ptr, end_ptr);
+    (*results)[i].category_id = std::distance(start_ptr, ptr);
+    (*results)[i].score = *ptr;
+    (*results)[i].category = labels[(*results)[i].category_id];
+  }
+  return true;
 }
 
 bool Model::predict(const cv::Mat& im, DetResult* result) {
-  result->clear();
   inputs_.clear();
+  result->clear();
   if (type == "classifier") {
     std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
-                 "to function predict()!"
-              << std::endl;
+                 "to function predict()!" << std::endl;
     return false;
   } else if (type == "segmenter") {
     std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
-                 "to function predict()!"
-              << std::endl;
+                 "to function predict()!" << std::endl;
     return false;
   }
 
@@ -172,6 +261,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
   auto im_tensor = predictor_->GetInputTensor("image");
   im_tensor->Reshape({1, 3, h, w});
   im_tensor->copy_from_cpu(inputs_.im_data_.data());
+
   if (name == "YOLOv3") {
     auto im_size_tensor = predictor_->GetInputTensor("im_size");
     im_size_tensor->Reshape({1, 2});
@@ -247,6 +337,180 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
                          static_cast<int>(box->coordinate[3])};
     }
   }
+  return true;
+}
+
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<DetResult>* result,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+
+  int batch_size = im_batch.size();
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // 对RCNN类模型做批量padding
+  if (batch_size > 1) {
+    if (name == "FasterRCNN" || name == "MaskRCNN") {
+      int max_h = -1;
+      int max_w = -1;
+      for (int i = 0; i < batch_size; ++i) {
+        max_h = std::max(max_h, inputs_batch_[i].new_im_size_[0]);
+        max_w = std::max(max_w, inputs_batch_[i].new_im_size_[1]);
+        // std::cout << "(" << inputs_batch_[i].new_im_size_[0]
+        //          << ", " << inputs_batch_[i].new_im_size_[1]
+        //          <<  ")" << std::endl;
+      }
+      thread_num = std::min(thread_num, batch_size);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int i = 0; i < batch_size; ++i) {
+        int h = inputs_batch_[i].new_im_size_[0];
+        int w = inputs_batch_[i].new_im_size_[1];
+        int c = im_batch[i].channels();
+        if (max_h != h || max_w != w) {
+          std::vector<float> temp_buffer(c * max_h * max_w);
+          float* temp_ptr = temp_buffer.data();
+          float* ptr = inputs_batch_[i].im_data_.data();
+          for (int cur_channel = c - 1; cur_channel >= 0; --cur_channel) {
+            int ori_pos = cur_channel * h * w + (h - 1) * w;
+            int des_pos = cur_channel * max_h * max_w + (h - 1) * max_w;
+            int last_pos = cur_channel * h * w;
+            for (; ori_pos >= last_pos; ori_pos -= w, des_pos -= max_w) {
+              memcpy(temp_ptr + des_pos, ptr + ori_pos, w * sizeof(float));
+            }
+          }
+          inputs_batch_[i].im_data_.swap(temp_buffer);
+          inputs_batch_[i].new_im_size_[0] = max_h;
+          inputs_batch_[i].new_im_size_[1] = max_w;
+        }
+      }
+    }
+  }
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  im_tensor->copy_from_cpu(inputs_data.data());
+  if (name == "YOLOv3") {
+    auto im_size_tensor = predictor_->GetInputTensor("im_size");
+    im_size_tensor->Reshape({batch_size, 2});
+    std::vector<int> inputs_data_size(batch_size * 2);
+    for (int i = 0; i < batch_size; ++i) {
+      std::copy(inputs_batch_[i].ori_im_size_.begin(),
+                inputs_batch_[i].ori_im_size_.end(),
+                inputs_data_size.begin() + 2 * i);
+    }
+    im_size_tensor->copy_from_cpu(inputs_data_size.data());
+  } else if (name == "FasterRCNN" || name == "MaskRCNN") {
+    auto im_info_tensor = predictor_->GetInputTensor("im_info");
+    auto im_shape_tensor = predictor_->GetInputTensor("im_shape");
+    im_info_tensor->Reshape({batch_size, 3});
+    im_shape_tensor->Reshape({batch_size, 3});
+
+    std::vector<float> im_info(3 * batch_size);
+    std::vector<float> im_shape(3 * batch_size);
+    for (int i = 0; i < batch_size; ++i) {
+      float ori_h = static_cast<float>(inputs_batch_[i].ori_im_size_[0]);
+      float ori_w = static_cast<float>(inputs_batch_[i].ori_im_size_[1]);
+      float new_h = static_cast<float>(inputs_batch_[i].new_im_size_[0]);
+      float new_w = static_cast<float>(inputs_batch_[i].new_im_size_[1]);
+      im_info[i * 3] = new_h;
+      im_info[i * 3 + 1] = new_w;
+      im_info[i * 3 + 2] = inputs_batch_[i].scale;
+      im_shape[i * 3] = ori_h;
+      im_shape[i * 3 + 1] = ori_w;
+      im_shape[i * 3 + 2] = 1.0;
+    }
+    im_info_tensor->copy_from_cpu(im_info.data());
+    im_shape_tensor->copy_from_cpu(im_shape.data());
+  }
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+
+  // 读取所有box
+  std::vector<float> output_box;
+  auto output_names = predictor_->GetOutputNames();
+  auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_box_shape = output_box_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_box_shape) {
+    size *= i;
+  }
+  output_box.resize(size);
+  output_box_tensor->copy_to_cpu(output_box.data());
+  if (size < 6) {
+    std::cerr << "[WARNING] There's no object detected." << std::endl;
+    return true;
+  }
+  auto lod_vector = output_box_tensor->lod();
+  int num_boxes = size / 6;
+  // 解析预测框box
+  for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
+    for (int j = lod_vector[0][i]; j < lod_vector[0][i + 1]; ++j) {
+      Box box;
+      box.category_id = static_cast<int>(round(output_box[j * 6]));
+      box.category = labels[box.category_id];
+      box.score = output_box[j * 6 + 1];
+      float xmin = output_box[j * 6 + 2];
+      float ymin = output_box[j * 6 + 3];
+      float xmax = output_box[j * 6 + 4];
+      float ymax = output_box[j * 6 + 5];
+      float w = xmax - xmin + 1;
+      float h = ymax - ymin + 1;
+      box.coordinate = {xmin, ymin, w, h};
+      (*result)[i].boxes.push_back(std::move(box));
+    }
+  }
+
+  // 实例分割需解析mask
+  if (name == "MaskRCNN") {
+    std::vector<float> output_mask;
+    auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
+    std::vector<int> output_mask_shape = output_mask_tensor->shape();
+    int masks_size = 1;
+    for (const auto& i : output_mask_shape) {
+      masks_size *= i;
+    }
+    int mask_pixels = output_mask_shape[2] * output_mask_shape[3];
+    int classes = output_mask_shape[1];
+    output_mask.resize(masks_size);
+    output_mask_tensor->copy_to_cpu(output_mask.data());
+    int mask_idx = 0;
+    for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
+      (*result)[i].mask_resolution = output_mask_shape[2];
+      for (int j = 0; j < (*result)[i].boxes.size(); ++j) {
+        Box* box = &(*result)[i].boxes[j];
+        int category_id = box->category_id;
+        auto begin_mask = output_mask.begin() +
+                          (mask_idx * classes + category_id) * mask_pixels;
+        auto end_mask = begin_mask + mask_pixels;
+        box->mask.data.assign(begin_mask, end_mask);
+        box->mask.shape = {static_cast<int>(box->coordinate[2]),
+                           static_cast<int>(box->coordinate[3])};
+        mask_idx++;
+      }
+    }
+  }
+  return true;
 }
 
 bool Model::predict(const cv::Mat& im, SegResult* result) {
@@ -254,13 +518,11 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
   inputs_.clear();
   if (type == "classifier") {
     std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
-                 "to function predict()!"
-              << std::endl;
+                 "to function predict()!" << std::endl;
     return false;
   } else if (type == "detector") {
     std::cerr << "Loading model is a 'detector', DetResult should be passed to "
-                 "function predict()!"
-              << std::endl;
+                 "function predict()!" << std::endl;
     return false;
   }
 
@@ -288,6 +550,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
     size *= i;
     result->label_map.shape.push_back(i);
   }
+
   result->label_map.data.resize(size);
   output_label_tensor->copy_to_cpu(result->label_map.data.data());
 
@@ -299,6 +562,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
     size *= i;
     result->score_map.shape.push_back(i);
   }
+
   result->score_map.data.resize(size);
   output_score_tensor->copy_to_cpu(result->score_map.data.data());
 
@@ -325,8 +589,8 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
       inputs_.im_size_before_resize_.pop_back();
       auto padding_w = before_shape[0];
       auto padding_h = before_shape[1];
-      mask_label = mask_label(cv::Rect(0, 0, padding_w, padding_h));
-      mask_score = mask_score(cv::Rect(0, 0, padding_w, padding_h));
+      mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
+      mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
     } else if (*iter == "resize") {
       auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
       inputs_.im_size_before_resize_.pop_back();
@@ -343,7 +607,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
                  cv::Size(resize_h, resize_w),
                  0,
                  0,
-                 cv::INTER_NEAREST);
+                 cv::INTER_LINEAR);
     }
     ++idx;
   }
@@ -353,6 +617,155 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
   result->score_map.data.assign(mask_score.begin<float>(),
                                 mask_score.end<float>());
   result->score_map.shape = {mask_score.rows, mask_score.cols};
+  return true;
+}
+
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<SegResult>* result,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  }
+
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+
+  int batch_size = im_batch.size();
+  (*result).clear();
+  (*result).resize(batch_size);
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  im_tensor->copy_from_cpu(inputs_data.data());
+  // im_tensor->copy_from_cpu(inputs_.im_data_.data());
+
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+
+  // 获取预测置信度，经过argmax后的labelmap
+  auto output_names = predictor_->GetOutputNames();
+  auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_label_shape = output_label_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_label_shape) {
+    size *= i;
+  }
+
+  std::vector<int64_t> output_labels(size, 0);
+  output_label_tensor->copy_to_cpu(output_labels.data());
+  auto output_labels_iter = output_labels.begin();
+
+  int single_batch_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    (*result)[i].label_map.data.resize(single_batch_size);
+    (*result)[i].label_map.shape.push_back(1);
+    for (int j = 1; j < output_label_shape.size(); ++j) {
+      (*result)[i].label_map.shape.push_back(output_label_shape[j]);
+    }
+    std::copy(output_labels_iter + i * single_batch_size,
+              output_labels_iter + (i + 1) * single_batch_size,
+              (*result)[i].label_map.data.data());
+  }
+
+  // 获取预测置信度scoremap
+  auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
+  std::vector<int> output_score_shape = output_score_tensor->shape();
+  size = 1;
+  for (const auto& i : output_score_shape) {
+    size *= i;
+  }
+
+  std::vector<float> output_scores(size, 0);
+  output_score_tensor->copy_to_cpu(output_scores.data());
+  auto output_scores_iter = output_scores.begin();
+
+  int single_batch_score_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    (*result)[i].score_map.data.resize(single_batch_score_size);
+    (*result)[i].score_map.shape.push_back(1);
+    for (int j = 1; j < output_score_shape.size(); ++j) {
+      (*result)[i].score_map.shape.push_back(output_score_shape[j]);
+    }
+    std::copy(output_scores_iter + i * single_batch_score_size,
+              output_scores_iter + (i + 1) * single_batch_score_size,
+              (*result)[i].score_map.data.data());
+  }
+
+  // 解析输出结果到原图大小
+  for (int i = 0; i < batch_size; ++i) {
+    std::vector<uint8_t> label_map((*result)[i].label_map.data.begin(),
+                                   (*result)[i].label_map.data.end());
+    cv::Mat mask_label((*result)[i].label_map.shape[1],
+                       (*result)[i].label_map.shape[2],
+                       CV_8UC1,
+                       label_map.data());
+
+    cv::Mat mask_score((*result)[i].score_map.shape[2],
+                       (*result)[i].score_map.shape[3],
+                       CV_32FC1,
+                       (*result)[i].score_map.data.data());
+    int idx = 1;
+    int len_postprocess = inputs_batch_[i].im_size_before_resize_.size();
+    for (std::vector<std::string>::reverse_iterator iter =
+             inputs_batch_[i].reshape_order_.rbegin();
+         iter != inputs_batch_[i].reshape_order_.rend();
+         ++iter) {
+      if (*iter == "padding") {
+        auto before_shape =
+            inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
+        inputs_batch_[i].im_size_before_resize_.pop_back();
+        auto padding_w = before_shape[0];
+        auto padding_h = before_shape[1];
+        mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
+        mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
+      } else if (*iter == "resize") {
+        auto before_shape =
+            inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
+        inputs_batch_[i].im_size_before_resize_.pop_back();
+        auto resize_w = before_shape[0];
+        auto resize_h = before_shape[1];
+        cv::resize(mask_label,
+                   mask_label,
+                   cv::Size(resize_h, resize_w),
+                   0,
+                   0,
+                   cv::INTER_NEAREST);
+        cv::resize(mask_score,
+                   mask_score,
+                   cv::Size(resize_h, resize_w),
+                   0,
+                   0,
+                   cv::INTER_LINEAR);
+      }
+      ++idx;
+    }
+    (*result)[i].label_map.data.assign(mask_label.begin<uint8_t>(),
+                                       mask_label.end<uint8_t>());
+    (*result)[i].label_map.shape = {mask_label.rows, mask_label.cols};
+    (*result)[i].score_map.data.assign(mask_score.begin<float>(),
+                                       mask_score.end<float>());
+    (*result)[i].score_map.shape = {mask_score.rows, mask_score.cols};
+  }
+  return true;
 }
 
-}  // namespce of PaddleX
+}  // namespace PaddleX
diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp
index 9224367d3522ebe4e323a40a1af92be7cfeae9d3..99a73ee7345bbc8cc672d1c42627a9326ded0cf7 100644
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
   if (width_ > 1 & height_ > 1) {
     padding_w = width_ - im->cols;
     padding_h = height_ - im->rows;
-  } else if (coarsest_stride_ > 1) {
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
     padding_h =
-        ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
     padding_w =
-        ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
   }
 
   if (padding_h < 0 || padding_w < 0) {
@@ -219,4 +221,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
   }
   return true;
 }
+
 }  // namespace PaddleX
diff --git a/deploy/cpp/src/visualize.cpp b/deploy/cpp/src/visualize.cpp
index 6ec09fd1c2b7a342ea3d31e784a80033d80f1014..1511887f097e20826f13c8c1f098ceea4efc0b5b 100644
--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
@@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir,
   std::string image_name(file_path.substr(pos + 1));
   return save_dir + OS_PATH_SEP + image_name;
 }
-}  // namespace of PaddleX
+}  // namespace PaddleX
diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
index ab33cb924f72d3e386c3391b6423a067254d94cf..dada892cc0ea706941d0a9966bd52e657fff0d56 100755
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
@@ -19,16 +19,16 @@
 
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
 
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
 
-|  版本说明   | 预测库(1.7.2版本)  |
+|  版本说明   | 预测库(1.8.2版本)  |
 |  ----  | ----  |
-| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
 
 更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
 
diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
index a8e9d015865d1108c70ca876c98e83b79c531269..4cf0a7f55b43e4cda7a583c790e86e2935092325 100755
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
@@ -27,18 +27,18 @@ git clone https://github.com/PaddlePaddle/PaddleX.git
 
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
 
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
 
-|  版本说明   | 预测库(1.7.2版本)  | 编译器 | 构建工具| cuDNN | CUDA
+|  版本说明   | 预测库(1.8.2版本)  | 编译器 | 构建工具| cuDNN | CUDA |
 |  ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 10.0 |
+| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
 
 
-更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
+更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)
 
 解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为：
 ```
@@ -72,12 +72,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 
 ![step2.2](../../images/vs2019_step3.png)
 
-3. 点击：`项目`->`PADDLEX_INFERENCE的CMake设置`
+3. 点击：`项目`->`CMake设置`
 
 ![step3](../../images/vs2019_step4.png)
 
 4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
 
+![step3](../../images/vs2019_step5.png)
+
 依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐，**使用9.0、10.0版本，不使用9.2、10.1等版本CUDA库**）：
 
 |  参数名   | 含义  |
@@ -95,13 +97,17 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 
 yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址，改为下载文件的路径。
 
-![step4](../../images/vs2019_step5.png)
+4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，解压到某目录\\path\\to\\paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入\\path\\to\\paddlex-encryption。
+
+![step_encryption](../../images/vs2019_step_encryption.png)
+
+![step4](../../images/vs2019_step6.png)
 
 **设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
 
 5. 点击`生成`->`全部生成`
 
-![step6](../../images/vs2019_step6.png)
+![step6](../../images/vs2019_step7.png)
 
 
 ### Step5: 预测及可视化
diff --git a/docs/tutorials/deploy/deploy_server/encryption.md b/docs/tutorials/deploy/deploy_server/encryption.md
index 7090421823bb3bbe2017818a3fc2f7e96608dda9..cf39bbbd6c4925b6536b8991e2800dd16ec46c97 100644
--- a/docs/tutorials/deploy/deploy_server/encryption.md
+++ b/docs/tutorials/deploy/deploy_server/encryption.md
@@ -2,7 +2,7 @@
 
 PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的模型加密工具对推理模型进行加密，预测部署SDK支持直接加载密文模型并完成推理，提升AI模型部署的安全性。
 
-**注意：目前加密方案仅支持Linux系统**
+**目前加密方案已支持Windows，Linux系统**
 
 ## 1. 方案简介
 
@@ -40,9 +40,11 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 
 ### 1.2 加密工具
 
-[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时，编译脚本会自动下载加密工具，您也可以选择手动下载。
+[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。
 
-加密工具包含内容为：
+[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，该版本加密工具需手动下载。
+
+Linux加密工具包含内容为：
 ```
 paddlex-encryption
 ├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
@@ -52,22 +54,38 @@ paddlex-encryption
 └── tool # paddlex_encrypt_tool
 ```
 
+Windows加密工具包含内容为：
+```
+paddlex-encryption
+├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
+|
+├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库
+|
+└── tool # paddlex_encrypt_tool.exe 模型加密工具
+```
 ### 1.3 加密PaddleX模型
 
 对模型完成加密后，加密工具会产生随机密钥信息(用于AES加解密使用），需要在后续加密部署时传入该密钥来用于解密。
 > 密钥由32字节key + 16字节iv组成， 注意这里产生的key是经过base64编码后的，这样可以扩充key的选取范围
 
+Linux:
 ```
 ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
 ```
 
+Windows:
+```
+./paddlex-encryption/tool/paddlex_encrypt_tool.exe -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
+```
+
 `-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`（**注意**：由于PaddleX代码的持续更新，版本低于1.0.0的模型暂时无法直接用于预测部署，参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
 
 ![](../images/encrypt.png)
 
 ## 2. PaddleX C++加密部署
 
-参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+### 2.1 Linux平台使用
+参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
 
 |  参数   | 说明  |
 |  ----  | ----  |
@@ -83,7 +101,7 @@ paddlex-encryption
 
 ## 样例
 
-可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
 
 `样例一`：
 
@@ -108,3 +126,34 @@ paddlex-encryption
 ./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+### 2.2 Windows平台使用
+参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe，paddlex_inference\classifer.exe，paddlex_inference\segmenter.exe。
+
+## 样例
+
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+
+`样例一`：
+
+不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+
+```shell
+.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=\\path\\to\\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+
+`样例二`:
+
+使用`GPU`预测多个图片`\\path\\to\\image_list.txt`，image_list.txt内容的格式如下：
+```
+\\path\\to\\images\\xiaoduxiong1.jpeg
+\\path\\to\\images\\xiaoduxiong2.jpeg
+...
+\\path\\to\\images\\xiaoduxiongn.jpeg
+```
+```shell
+.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\models\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
diff --git a/docs/tutorials/deploy/images/vs2019_step4.png b/docs/tutorials/deploy/images/vs2019_step4.png
index 74fe7b224dc9b3533066111ab2a9cd877a5bbc68..9df86c9824f5fde45bc6df440ddc813742ce460f 100644
Binary files a/docs/tutorials/deploy/images/vs2019_step4.png and b/docs/tutorials/deploy/images/vs2019_step4.png differ
diff --git a/docs/tutorials/deploy/images/vs2019_step5.png b/docs/tutorials/deploy/images/vs2019_step5.png
old mode 100755
new mode 100644
index 0986e823812e2316c4fd0f2e6cb260a1204fda40..e5349cf08b209561b5cd11e3f5b68e4aa6c6e295
Binary files a/docs/tutorials/deploy/images/vs2019_step5.png and b/docs/tutorials/deploy/images/vs2019_step5.png differ
diff --git a/docs/tutorials/deploy/images/vs2019_step6.png b/docs/tutorials/deploy/images/vs2019_step6.png
index 86a8039cbd2a9f8fb499ed72d386b5c02b30c86c..0986e823812e2316c4fd0f2e6cb260a1204fda40 100755
Binary files a/docs/tutorials/deploy/images/vs2019_step6.png and b/docs/tutorials/deploy/images/vs2019_step6.png differ
diff --git a/docs/tutorials/deploy/images/vs2019_step7.png b/docs/tutorials/deploy/images/vs2019_step7.png
new file mode 100755
index 0000000000000000000000000000000000000000..86a8039cbd2a9f8fb499ed72d386b5c02b30c86c
Binary files /dev/null and b/docs/tutorials/deploy/images/vs2019_step7.png differ
diff --git a/docs/tutorials/deploy/images/vs2019_step_encryption.png b/docs/tutorials/deploy/images/vs2019_step_encryption.png
new file mode 100644
index 0000000000000000000000000000000000000000..27a606799363b8b0f383ebd06f86a9a20e133ce9
Binary files /dev/null and b/docs/tutorials/deploy/images/vs2019_step_encryption.png differ
diff --git a/tools/codestyle/clang_format.hook b/tools/codestyle/clang_format.hook
index 1d928216867c0ba3897d71542fea44debf8d72a0..14300746ac343fa56c690bc43fc02659d690f73c 100755
--- a/tools/codestyle/clang_format.hook
+++ b/tools/codestyle/clang_format.hook
@@ -1,15 +1,15 @@
 #!/bin/bash
-set -e
-
-readonly VERSION="3.8"
-
-version=$(clang-format -version)
-
-if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
-fi
-
-clang-format $@
+# set -e
+# 
+# readonly VERSION="3.8"
+# 
+# version=$(clang-format -version)
+# 
+# if ! [[ $version == *"$VERSION"* ]]; then
+#     echo "clang-format version check failed."
+#     echo "a version contains '$VERSION' is needed, but get '$version'"
+#     echo "you can install the right version, and make an soft-link to '\$PATH' env"
+#     exit -1
+# fi
+# 
+# clang-format $@