Add Batch Predict and Fix Windows Secure Deployment

Add batch prediction and comment in every header file

Add Batch Predict and Fix Windows Secure Deployment
Add batch prediction and comment in every header file
d66c1ffc · Zeyu Chen · GitHub · 05624d19 · 32e7e39b · d66c1ffc
22 changed file
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -73,7 +73,11 @@ endif()
 if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
 endif()
-include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+# zlib does not exist in 1.8.1
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
+    include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+endif()
 include_directories("${PADDLE_DIR}/third_party/boost")
 include_directories("${PADDLE_DIR}/third_party/eigen3")
@@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
 endif()
-link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+endif()
 link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
 link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
@@ -186,8 +193,13 @@ if(WITH_STATIC_LIB)
    set(DEPS
        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
-    set(DEPS
+    if (NOT WIN32)
-        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    else()
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
 endif()
 if (NOT WIN32)
@@ -204,13 +216,16 @@ if (NOT WIN32)
 else()
    set(DEPS ${DEPS}
        ${MATH_LIB} ${MKLDNN_LIB}
-        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+        glog gflags_static libprotobuf xxhash libyaml-cppmt)
+    if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+      set(DEPS ${DEPS} zlibstatic)
+    endif()
    set(DEPS ${DEPS} libcmt shlwapi)
    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
        set(DEPS ${DEPS} snappy)
    endif()
-    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
        set(DEPS ${DEPS} snappystream)
    endif()
 endif(NOT WIN32)
@@ -236,7 +251,9 @@ if(WITH_ENCRYPTION)
      link_directories("${ENCRYPTION_DIR}/lib")
      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
-    message(FATAL_ERROR "Encryption Tool don't support WINDOWS")
+      include_directories("${ENCRYPTION_DIR}/include")
+      link_directories("${ENCRYPTION_DIR}/lib")
+      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
  endif()
 endif()
@@ -284,10 +301,23 @@ if (WIN32 AND WITH_MKL)
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
    )
+    # for encryption
+    if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
+        add_custom_command(TARGET classifier POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET detector POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET segmenter POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+    endif()
 endif()
 file(COPY  "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"

--- a/deploy/cpp/CMakeSettings.json
+++ b/deploy/cpp/CMakeSettings.json
@@ -21,6 +21,11 @@
                    "value": "C:/projects/fluid_install_dir_win_cpu_1.6/fluid_install_dir_win_cpu_1.6",
                    "type": "PATH"
                },
+                {
+                    "name": "CUDA_LIB",
+                    "value": "",
+                    "type": "PATH"
+                },
                {
                    "name": "CMAKE_BUILD_TYPE",
                    "value": "Release",
@@ -40,8 +45,18 @@
                    "name": "WITH_GPU",
                    "value": "False",
                    "type": "BOOL"
+                },
+                {
+                    "name": "WITH_ENCRYPTION",
+                    "value": "False",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "ENCRYPTION_DIR",
+                    "value": "",
+                    "type": "PATH"
                }
            ]
        }
    ]
 }
\ No newline at end of file
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -13,14 +13,19 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -28,6 +33,10 @@ DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 int main(int argc, char** argv) {
  // Parsing command-line
@@ -44,32 +53,81 @@ int main(int argc, char** argv) {
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
  // 进行预测
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  if (FLAGS_image_list != "") {
    std::ifstream inf(FLAGS_image_list);
    if (!inf) {
      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
      return -1;
    }
+    // 多batch预测
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::ClsResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
-      std::cout << "Predict label: " << result.category
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
-                << ", label_id:" << result.category_id
+      auto start = system_clock::now();
-                << ", score: " << result.score << std::endl;
+      // 读图像
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::ClsResult> results(im_vec_size - i,
+                                              PaddleX::ClsResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      for (int j = i; j < im_vec_size; ++j) {
+        std::cout << "Path:" << image_paths[j]
+                  << ", predict label: " << results[j - i].category
+                  << ", label_id:" << results[j - i].category_id
+                  << ", score: " << results[j - i].score << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::ClsResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    std::cout << "Predict label: " << result.category
              << ", label_id:" << result.category_id
              << ", score: " << result.score << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read time: " << total_imread_time_s / imgs
+            << " s/img, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -13,15 +13,21 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +36,13 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -43,11 +56,19 @@ int main(int argc, char** argv) {
    std::cerr << "--image or --image_list need to be defined" << std::endl;
    return -1;
  }
+  std::cout << "Thread num: " << FLAGS_thread_num << std::endl;
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  std::string save_dir = "output";
  // 进行预测
@@ -58,47 +79,76 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::DetResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
-      for (int i = 0; i < result.boxes.size(); ++i) {
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
-        std::cout << "image file: " << image_path
+      auto start = system_clock::now();
-                  << ", predict label: " << result.boxes[i].category
+      int im_vec_size =
-                  << ", label_id:" << result.boxes[i].category_id
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
-                  << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
-                  << result.boxes[i].coordinate[0] << ", "
+      std::vector<PaddleX::DetResult> results(im_vec_size - i,
-                  << result.boxes[i].coordinate[1] << ", "
+                                              PaddleX::DetResult());
-                  << result.boxes[i].coordinate[2] << ", "
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
-                  << result.boxes[i].coordinate[3] << ")" << std::endl;
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      // 输出结果目标框
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        for (int k = 0; k < results[j].boxes.size(); ++k) {
+          std::cout << "image file: " << image_paths[i + j] << ", ";
+          std::cout << "predict label: " << results[j].boxes[k].category
+                    << ", label_id:" << results[j].boxes[k].category_id
+                    << ", score: " << results[j].boxes[k].score
+                    << ", box(xmin, ymin, w, h):("
+                    << results[j].boxes[k].coordinate[0] << ", "
+                    << results[j].boxes[k].coordinate[1] << ", "
+                    << results[j].boxes[k].coordinate[2] << ", "
+                    << results[j].boxes[k].coordinate[3] << ")" << std::endl;
+        }
      }
      // 可视化
-      cv::Mat vis_img =
+      for (int j = 0; j < im_vec_size - i; ++j) {
-          PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        cv::Mat vis_img = PaddleX::Visualize(
-      std::string save_path =
+            im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+        std::string save_path =
-      cv::imwrite(save_path, vis_img);
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
-      result.clear();
+        cv::imwrite(save_path, vis_img);
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
    }
  } else {
    PaddleX::DetResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
    for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
      std::cout << ", predict label: " << result.boxes[i].category
                << ", label_id:" << result.boxes[i].category_id
-                << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
+                << ", score: " << result.boxes[i].score
-                << result.boxes[i].coordinate[0] << ", "
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
-                << result.boxes[i].coordinate[1] << ", "
+                << ", " << result.boxes[i].coordinate[1] << ", "
                << result.boxes[i].coordinate[2] << ", "
                << result.boxes[i].coordinate[3] << ")" << std::endl;
    }
    // 可视化
    cv::Mat vis_img =
-        PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);
    std::string save_path =
        PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
    cv::imwrite(save_path, vis_img);
@@ -106,5 +156,11 @@ int main(int argc, char** argv) {
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -13,15 +13,20 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +35,10 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -46,8 +55,16 @@ int main(int argc, char** argv) {
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_batch_size);
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  // 进行预测
  if (FLAGS_image_list != "") {
@@ -57,23 +74,54 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::SegResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::SegResult> results(im_vec_size - i,
+                                              PaddleX::SegResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
      // 可视化
-      cv::Mat vis_img =
+      for (int j = 0; j < im_vec_size - i; ++j) {
-          PaddleX::Visualize(im, result, model.labels, colormap);
+        cv::Mat vis_img =
-      std::string save_path =
+            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap);
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+        std::string save_path =
-      cv::imwrite(save_path, vis_img);
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
-      result.clear();
+        cv::imwrite(save_path, vis_img);
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::SegResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    // 可视化
    cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
    std::string save_path =
@@ -82,6 +130,11 @@ int main(int argc, char** argv) {
    result.clear();
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/include/paddlex/config_parser.h
+++ b/deploy/cpp/include/paddlex/config_parser.h
@@ -54,4 +54,4 @@ class ConfigPaser {
  YAML::Node Transforms_;
 };
-}  // namespace PaddleDetection
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -16,8 +16,11 @@
 #include <functional>
 #include <iostream>
+#include <map>
+#include <memory>
 #include <numeric>
+#include <string>
+#include <vector>
 #include "yaml-cpp/yaml.h"
 #ifdef _WIN32
@@ -28,53 +31,193 @@
 #include "paddle_inference_api.h"  // NOLINT
-#include "config_parser.h"
+#include "config_parser.h"  // NOLINT
-#include "results.h"
+#include "results.h"  // NOLINT
-#include "transforms.h"
+#include "transforms.h"  // NOLINT
 #ifdef WITH_ENCRYPTION
-#include "paddle_model_decrypt.h"
+#include "paddle_model_decrypt.h"  // NOLINT
-#include "model_code.h"
+#include "model_code.h"  // NOLINT
 #endif
 namespace PaddleX {
+/*
+ * @brief
+ * This class encapsulates all necessary proccess steps of model infering, which
+ * include image matrix preprocessing, model predicting and results postprocessing.
+ * The entire process of model infering can be simplified as below:
+ * 1. preprocess image matrix (resize, padding, ......)
+ * 2. model infer
+ * 3. postprocess the results which generated from model infering
+ *
+ * @example
+ *  PaddleX::Model cls_model;
+ *  // initialize model configuration
+ *  cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key);
+ *  // define a Classification result object
+ *  PaddleX::ClsResult cls_result;
+ *  // get image matrix from image file
+ *  cv::Mat im = cv::imread(image_file_path, 1);
+ *  cls_model.predict(im, &cls_result);
+ * */
 class Model {
 public:
+  /*
+   * @brief
+   * This method aims to initialize the model configuration
+   *
+   * @param model_dir: the directory which contains model.yml
+   * @param use_gpu: use gpu or not when infering
+   * @param use_trt: use Tensor RT or not when infering
+   * @param gpu_id: the id of gpu when infering with using gpu
+   * @param key: the key of encryption when using encrypted model
+   * @param batch_size: batch size of infering
+   * */
  void Init(const std::string& model_dir,
            bool use_gpu = false,
            bool use_trt = false,
            int gpu_id = 0,
-            std::string key = "") {
+            std::string key = "",
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key);
+            int batch_size = 1) {
+    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, batch_size);
  }
  void create_predictor(const std::string& model_dir,
                        bool use_gpu = false,
                        bool use_trt = false,
                        int gpu_id = 0,
-                        std::string key = "");
+                        std::string key = "",
+                        int batch_size = 1);
-  bool load_config(const std::string& model_dir);
+  /*
+   * @brief
+   * This method aims to load model configurations which include
+   * transform steps and label list
+   *
+   * @param yaml_input:  model configuration string
+   * @return true if load configuration successfully
+   * */
+  bool load_config(const std::string& yaml_input);
+  /*
+   * @brief
+   * This method aims to transform single image matrix, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im: single image matrix to be transformed
+   * @param blob: the raw data of single image matrix after transformed
+   * @return true if preprocess image matrix successfully
+   * */
  bool preprocess(const cv::Mat& input_im, ImageBlob* blob);
+  /*
+   * @brief
+   * This method aims to transform mutiple image matrixs, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im_batch: a batch of image matrixs to be transformed
+   * @param blob_blob: raw data of a batch of image matrixs after transformed
+   * @param thread_num: the number of preprocessing threads,
+   *                    each thread run preprocess on single image matrix
+   * @return true if preprocess a batch of image matrixs successfully
+   * */
+  bool preprocess(const std::vector<cv::Mat> &input_im_batch,
+                  std::vector<ImageBlob> *blob_batch,
+                  int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: classification prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, ClsResult* result);
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on a batch of image matrixs,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrixs to be predicted
+   * @param results: a batch of classification prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<ClsResult> *results,
+               int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on single image matrix, the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, DetResult* result);
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on a batch of image matrixs, the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<DetResult> *result,
+               int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, SegResult* result);
-  bool postprocess(SegResult* result);
+  /*
+   * @brief
-  bool postprocess(DetResult* result);
+   * This method aims to execute segmentation model prediction on a batch of image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<SegResult> *result,
+               int thread_num = 1);
+  // model type, include 3 type: classifier, detector, segmenter
  std::string type;
+  // model name, such as FasterRCNN, YOLOV3 and so on.
  std::string name;
  std::map<int, std::string> labels;
+  // transform(preprocessing) pipeline manager
  Transforms transforms_;
+  // single input preprocessed data
  ImageBlob inputs_;
+  // batch input preprocessed data
+  std::vector<ImageBlob> inputs_batch_;
+  // raw data of predicting results
  std::vector<float> outputs_;
+  // a predictor which run the model predicting
  std::unique_ptr<paddle::PaddlePredictor> predictor_;
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
@@ -20,9 +20,15 @@
 namespace PaddleX {
+/*
+ * @brief
+ * This class represents mask in instance segmentation tasks.
+ * */
 template <class T>
 struct Mask {
+  // raw data of mask
  std::vector<T> data;
+  // the shape of mask
  std::vector<int> shape;
  void clear() {
    data.clear();
@@ -30,19 +36,34 @@ struct Mask {
  }
 };
+/*
+ * @brief 
+ * This class represents target box in detection or instance segmentation tasks.
+ * */
 struct Box {
  int category_id;
+  // category label this box belongs to
  std::string category;
+  // confidence score
  float score;
  std::vector<float> coordinate;
  Mask<float> mask;
 };
+/*
+ * @brief
+ * This class is prediction result based class.
+ * */
 class BaseResult {
 public:
+  // model type
  std::string type = "base";
 };
+/*
+ * @brief
+ * This class represent classification result.
+ * */
 class ClsResult : public BaseResult {
 public:
  int category_id;
@@ -51,17 +72,28 @@ class ClsResult : public BaseResult {
  std::string type = "cls";
 };
+/*
+ * @brief
+ * This class represent detection or instance segmentation result.
+ * */
 class DetResult : public BaseResult {
 public:
+  // target boxes
  std::vector<Box> boxes;
  int mask_resolution;
  std::string type = "det";
  void clear() { boxes.clear(); }
 };
+/*
+ * @brief
+ * This class represent segmentation result.
+ * */
 class SegResult : public BaseResult {
 public:
+  // represent label of each pixel on image matrix
  Mask<int64_t> label_map;
+  // represent score of each pixel on image matrix
  Mask<float> score_map;
  std::string type = "seg";
  void clear() {

--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -28,7 +28,10 @@
 namespace PaddleX {
-// Object for storing all preprocessed data
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
 class ImageBlob {
 public:
  // Original image height and width
@@ -45,21 +48,34 @@ class ImageBlob {
  std::vector<float> im_data_;
  void clear() {
-    ori_im_size_.clear();
-    new_im_size_.clear();
    im_size_before_resize_.clear();
    reshape_order_.clear();
    im_data_.clear();
  }
 };
-// Abstraction of preprocessing opration class
+/*
+ * @brief
+ * Abstraction of preprocessing operation class
+ * */
 class Transform {
 public:
  virtual void Init(const YAML::Node& item) = 0;
+  /*
+   * @brief
+   * This method executes preprocessing operation on image matrix,
+   * result will be returned at second parameter.
+   * @param im: single image matrix to be preprocessed
+   * @param data: the raw data of single image matrix after preprocessed
+   * @return true if transform successfully
+   * */
  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
 };
+/*
+ * @brief
+ * This class execute normalization operation on image matrix
+ * */
 class Normalize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -74,6 +90,14 @@ class Normalize : public Transform {
  std::vector<float> std_;
 };
+/*
+ * @brief
+ * This class execute resize by short operation on image matrix. At first, it resizes
+ * the short side of image matrix to specified length. Accordingly, the long side
+ * will be resized in the same proportion. If new length of long side exceeds max
+ * size, the long size will be resized to max size, and the short size will be
+ * resized in the same proportion
+ * */
 class ResizeByShort : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -92,6 +116,12 @@ class ResizeByShort : public Transform {
  int max_size_;
 };
+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
 class ResizeByLong : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -103,6 +133,11 @@ class ResizeByLong : public Transform {
  int long_size_;
 };
+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
 class Resize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -128,6 +163,11 @@ class Resize : public Transform {
  std::string interp_;
 };
+/*
+ * @brief
+ * This class execute center crop operation on image matrix. It crops the center
+ * of image matrix accroding to specified size.
+ * */
 class CenterCrop : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -147,6 +187,11 @@ class CenterCrop : public Transform {
  int width_;
 };
+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
 class Padding : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -175,7 +220,11 @@ class Padding : public Transform {
  int width_ = 0;
  int height_ = 0;
 };
+/*
+ * @brief
+ * This class is transform operations manager. It stores all neccessary
+ * transform operations and run them in correct order.
+ * */
 class Transforms {
 public:
  void Init(const YAML::Node& node, bool to_rgb = true);

--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
@@ -43,20 +43,55 @@
 namespace PaddleX {
-// Generate visualization colormap for each class
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
 std::vector<int> GenerateColorMap(int num_class);
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const DetResult& results,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap,
                     float threshold = 0.5);
+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const SegResult& result,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap);
+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
 std::string generate_save_path(const std::string& save_dir,
                               const std::string& file_path);
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -11,32 +11,50 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <omp.h>
+#include <algorithm>
+#include <fstream>
+#include <cstring>
 #include "include/paddlex/paddlex.h"
 namespace PaddleX {
 void Model::create_predictor(const std::string& model_dir,
                             bool use_gpu,
                             bool use_trt,
                             int gpu_id,
-                             std::string key) {
+                             std::string key,
-  // 读取配置文件
+                             int batch_size) {
-  if (!load_config(model_dir)) {
-    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
-    exit(-1);
-  }
  paddle::AnalysisConfig config;
  std::string model_file = model_dir + OS_PATH_SEP + "__model__";
  std::string params_file = model_dir + OS_PATH_SEP + "__params__";
+  std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
+  std::string yaml_input = "";
 #ifdef WITH_ENCRYPTION
-  if (key != ""){
+  if (key != "") {
    model_file = model_dir + OS_PATH_SEP + "__model__.encrypted";
    params_file = model_dir + OS_PATH_SEP + "__params__.encrypted";
-    paddle_security_load_model(&config, key.c_str(), model_file.c_str(), params_file.c_str());
+    yaml_file = model_dir + OS_PATH_SEP + "model.yml.encrypted";
+    paddle_security_load_model(
+        &config, key.c_str(), model_file.c_str(), params_file.c_str());
+    yaml_input = decrypt_file(yaml_file.c_str(), key.c_str());
  }
 #endif
-  if (key == ""){
+  if (yaml_input == "") {
+    // 读取配置文件
+    std::ifstream yaml_fin(yaml_file);
+    yaml_fin.seekg(0, std::ios::end);
+    size_t yaml_file_size = yaml_fin.tellg();
+    yaml_input.assign(yaml_file_size, ' ');
+    yaml_fin.seekg(0);
+    yaml_fin.read(&yaml_input[0], yaml_file_size);
+  }
+  // 读取配置文件内容
+  if (!load_config(yaml_input)) {
+    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
+    exit(-1);
+  }
+  if (key == "") {
    config.SetModel(model_file, params_file);
  }
  if (use_gpu) {
@@ -58,20 +76,20 @@ void Model::create_predictor(const std::string& model_dir,
        false /* use_calib_mode*/);
  }
  predictor_ = std::move(CreatePaddlePredictor(config));
+  inputs_batch_.assign(batch_size, ImageBlob());
 }
-bool Model::load_config(const std::string& model_dir) {
+bool Model::load_config(const std::string& yaml_input) {
-  std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
+  YAML::Node config = YAML::Load(yaml_input);
-  YAML::Node config = YAML::LoadFile(yaml_file);
  type = config["_Attributes"]["model_type"].as<std::string>();
  name = config["Model"].as<std::string>();
  std::string version = config["version"].as<std::string>();
  if (version[0] == '0') {
-    std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
+    std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, "
-              << "cannot be done, please refer to "
+              << "deployment cannot be done, please refer to "
-              << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
+              << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs"
-              << "to transfer version."
+              << "/tutorials/deploy/upgrade_version.md "
-              << std::endl;
+              << "to transfer version." << std::endl;
    return false;
  }
  bool to_rgb = true;
@@ -104,17 +122,29 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
  return true;
 }
+// use openmp
+bool Model::preprocess(const std::vector<cv::Mat>& input_im_batch,
+                       std::vector<ImageBlob>* blob_batch,
+                       int thread_num) {
+  int batch_size = input_im_batch.size();
+  bool success = true;
+  thread_num = std::min(thread_num, batch_size);
+  #pragma omp parallel for num_threads(thread_num)
+  for (int i = 0; i < input_im_batch.size(); ++i) {
+    cv::Mat im = input_im_batch[i].clone();
+    if (!transforms_.Run(&im, &(*blob_batch)[i])) {
+      success = false;
+    }
+  }
+  return success;
+}
 bool Model::predict(const cv::Mat& im, ClsResult* result) {
  inputs_.clear();
  if (type == "detector") {
    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
                 "function predict()!"
-              << std::endl;
+                 "to function predict()!" << std::endl;
-    return false;
-  } else if (type == "segmenter") {
-    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
-                 "to function predict()!"
-              << std::endl;
    return false;
  }
  // 处理输入图像
@@ -144,20 +174,79 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
  result->category_id = std::distance(std::begin(outputs_), ptr);
  result->score = *ptr;
  result->category = labels[result->category_id];
+  return true;
+}
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<ClsResult>* results,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // 使用加载的模型进行预测
+  int batch_size = im_batch.size();
+  auto in_tensor = predictor_->GetInputTensor("image");
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  in_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  in_tensor->copy_from_cpu(inputs_data.data());
+  // in_tensor->copy_from_cpu(inputs_.im_data_.data());
+  predictor_->ZeroCopyRun();
+  // 取出模型的输出结果
+  auto output_names = predictor_->GetOutputNames();
+  auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_shape = output_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_shape) {
+    size *= i;
+  }
+  outputs_.resize(size);
+  output_tensor->copy_to_cpu(outputs_.data());
+  // 对模型输出结果进行后处理
+  int single_batch_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    auto start_ptr = std::begin(outputs_);
+    auto end_ptr = std::begin(outputs_);
+    std::advance(start_ptr, i * single_batch_size);
+    std::advance(end_ptr, (i + 1) * single_batch_size);
+    auto ptr = std::max_element(start_ptr, end_ptr);
+    (*results)[i].category_id = std::distance(start_ptr, ptr);
+    (*results)[i].score = *ptr;
+    (*results)[i].category = labels[(*results)[i].category_id];
+  }
+  return true;
 }
 bool Model::predict(const cv::Mat& im, DetResult* result) {
-  result->clear();
  inputs_.clear();
+  result->clear();
  if (type == "classifier") {
    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
-                 "to function predict()!"
+                 "to function predict()!" << std::endl;
-              << std::endl;
    return false;
  } else if (type == "segmenter") {
    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
-                 "to function predict()!"
+                 "to function predict()!" << std::endl;
-              << std::endl;
    return false;
  }
@@ -172,6 +261,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
  auto im_tensor = predictor_->GetInputTensor("image");
  im_tensor->Reshape({1, 3, h, w});
  im_tensor->copy_from_cpu(inputs_.im_data_.data());
  if (name == "YOLOv3") {
    auto im_size_tensor = predictor_->GetInputTensor("im_size");
    im_size_tensor->Reshape({1, 2});
@@ -247,6 +337,180 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
                         static_cast<int>(box->coordinate[3])};
    }
  }
+  return true;
+}
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<DetResult>* result,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+  int batch_size = im_batch.size();
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // 对RCNN类模型做批量padding
+  if (batch_size > 1) {
+    if (name == "FasterRCNN" || name == "MaskRCNN") {
+      int max_h = -1;
+      int max_w = -1;
+      for (int i = 0; i < batch_size; ++i) {
+        max_h = std::max(max_h, inputs_batch_[i].new_im_size_[0]);
+        max_w = std::max(max_w, inputs_batch_[i].new_im_size_[1]);
+        // std::cout << "(" << inputs_batch_[i].new_im_size_[0]
+        //          << ", " << inputs_batch_[i].new_im_size_[1]
+        //          <<  ")" << std::endl;
+      }
+      thread_num = std::min(thread_num, batch_size);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int i = 0; i < batch_size; ++i) {
+        int h = inputs_batch_[i].new_im_size_[0];
+        int w = inputs_batch_[i].new_im_size_[1];
+        int c = im_batch[i].channels();
+        if (max_h != h || max_w != w) {
+          std::vector<float> temp_buffer(c * max_h * max_w);
+          float* temp_ptr = temp_buffer.data();
+          float* ptr = inputs_batch_[i].im_data_.data();
+          for (int cur_channel = c - 1; cur_channel >= 0; --cur_channel) {
+            int ori_pos = cur_channel * h * w + (h - 1) * w;
+            int des_pos = cur_channel * max_h * max_w + (h - 1) * max_w;
+            int last_pos = cur_channel * h * w;
+            for (; ori_pos >= last_pos; ori_pos -= w, des_pos -= max_w) {
+              memcpy(temp_ptr + des_pos, ptr + ori_pos, w * sizeof(float));
+            }
+          }
+          inputs_batch_[i].im_data_.swap(temp_buffer);
+          inputs_batch_[i].new_im_size_[0] = max_h;
+          inputs_batch_[i].new_im_size_[1] = max_w;
+        }
+      }
+    }
+  }
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  im_tensor->copy_from_cpu(inputs_data.data());
+  if (name == "YOLOv3") {
+    auto im_size_tensor = predictor_->GetInputTensor("im_size");
+    im_size_tensor->Reshape({batch_size, 2});
+    std::vector<int> inputs_data_size(batch_size * 2);
+    for (int i = 0; i < batch_size; ++i) {
+      std::copy(inputs_batch_[i].ori_im_size_.begin(),
+                inputs_batch_[i].ori_im_size_.end(),
+                inputs_data_size.begin() + 2 * i);
+    }
+    im_size_tensor->copy_from_cpu(inputs_data_size.data());
+  } else if (name == "FasterRCNN" || name == "MaskRCNN") {
+    auto im_info_tensor = predictor_->GetInputTensor("im_info");
+    auto im_shape_tensor = predictor_->GetInputTensor("im_shape");
+    im_info_tensor->Reshape({batch_size, 3});
+    im_shape_tensor->Reshape({batch_size, 3});
+    std::vector<float> im_info(3 * batch_size);
+    std::vector<float> im_shape(3 * batch_size);
+    for (int i = 0; i < batch_size; ++i) {
+      float ori_h = static_cast<float>(inputs_batch_[i].ori_im_size_[0]);
+      float ori_w = static_cast<float>(inputs_batch_[i].ori_im_size_[1]);
+      float new_h = static_cast<float>(inputs_batch_[i].new_im_size_[0]);
+      float new_w = static_cast<float>(inputs_batch_[i].new_im_size_[1]);
+      im_info[i * 3] = new_h;
+      im_info[i * 3 + 1] = new_w;
+      im_info[i * 3 + 2] = inputs_batch_[i].scale;
+      im_shape[i * 3] = ori_h;
+      im_shape[i * 3 + 1] = ori_w;
+      im_shape[i * 3 + 2] = 1.0;
+    }
+    im_info_tensor->copy_from_cpu(im_info.data());
+    im_shape_tensor->copy_from_cpu(im_shape.data());
+  }
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+  // 读取所有box
+  std::vector<float> output_box;
+  auto output_names = predictor_->GetOutputNames();
+  auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_box_shape = output_box_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_box_shape) {
+    size *= i;
+  }
+  output_box.resize(size);
+  output_box_tensor->copy_to_cpu(output_box.data());
+  if (size < 6) {
+    std::cerr << "[WARNING] There's no object detected." << std::endl;
+    return true;
+  }
+  auto lod_vector = output_box_tensor->lod();
+  int num_boxes = size / 6;
+  // 解析预测框box
+  for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
+    for (int j = lod_vector[0][i]; j < lod_vector[0][i + 1]; ++j) {
+      Box box;
+      box.category_id = static_cast<int>(round(output_box[j * 6]));
+      box.category = labels[box.category_id];
+      box.score = output_box[j * 6 + 1];
+      float xmin = output_box[j * 6 + 2];
+      float ymin = output_box[j * 6 + 3];
+      float xmax = output_box[j * 6 + 4];
+      float ymax = output_box[j * 6 + 5];
+      float w = xmax - xmin + 1;
+      float h = ymax - ymin + 1;
+      box.coordinate = {xmin, ymin, w, h};
+      (*result)[i].boxes.push_back(std::move(box));
+    }
+  }
+  // 实例分割需解析mask
+  if (name == "MaskRCNN") {
+    std::vector<float> output_mask;
+    auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
+    std::vector<int> output_mask_shape = output_mask_tensor->shape();
+    int masks_size = 1;
+    for (const auto& i : output_mask_shape) {
+      masks_size *= i;
+    }
+    int mask_pixels = output_mask_shape[2] * output_mask_shape[3];
+    int classes = output_mask_shape[1];
+    output_mask.resize(masks_size);
+    output_mask_tensor->copy_to_cpu(output_mask.data());
+    int mask_idx = 0;
+    for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
+      (*result)[i].mask_resolution = output_mask_shape[2];
+      for (int j = 0; j < (*result)[i].boxes.size(); ++j) {
+        Box* box = &(*result)[i].boxes[j];
+        int category_id = box->category_id;
+        auto begin_mask = output_mask.begin() +
+                          (mask_idx * classes + category_id) * mask_pixels;
+        auto end_mask = begin_mask + mask_pixels;
+        box->mask.data.assign(begin_mask, end_mask);
+        box->mask.shape = {static_cast<int>(box->coordinate[2]),
+                           static_cast<int>(box->coordinate[3])};
+        mask_idx++;
+      }
+    }
+  }
+  return true;
 }
 bool Model::predict(const cv::Mat& im, SegResult* result) {
@@ -254,13 +518,11 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
  inputs_.clear();
  if (type == "classifier") {
    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
-                 "to function predict()!"
+                 "to function predict()!" << std::endl;
-              << std::endl;
    return false;
  } else if (type == "detector") {
    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
-                 "function predict()!"
+                 "function predict()!" << std::endl;
-              << std::endl;
    return false;
  }
@@ -288,6 +550,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
    size *= i;
    result->label_map.shape.push_back(i);
  }
  result->label_map.data.resize(size);
  output_label_tensor->copy_to_cpu(result->label_map.data.data());
@@ -299,6 +562,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
    size *= i;
    result->score_map.shape.push_back(i);
  }
  result->score_map.data.resize(size);
  output_score_tensor->copy_to_cpu(result->score_map.data.data());
@@ -325,8 +589,8 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
      inputs_.im_size_before_resize_.pop_back();
      auto padding_w = before_shape[0];
      auto padding_h = before_shape[1];
-      mask_label = mask_label(cv::Rect(0, 0, padding_w, padding_h));
+      mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
-      mask_score = mask_score(cv::Rect(0, 0, padding_w, padding_h));
+      mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
    } else if (*iter == "resize") {
      auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
      inputs_.im_size_before_resize_.pop_back();
@@ -343,7 +607,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
                 cv::Size(resize_h, resize_w),
                 0,
                 0,
-                 cv::INTER_NEAREST);
+                 cv::INTER_LINEAR);
    }
    ++idx;
  }
@@ -353,6 +617,155 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
  result->score_map.data.assign(mask_score.begin<float>(),
                                mask_score.end<float>());
  result->score_map.shape = {mask_score.rows, mask_score.cols};
+  return true;
+}
+bool Model::predict(const std::vector<cv::Mat>& im_batch,
+                    std::vector<SegResult>* result,
+                    int thread_num) {
+  for (auto& inputs : inputs_batch_) {
+    inputs.clear();
+  }
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  }
+  // 处理输入图像
+  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  int batch_size = im_batch.size();
+  (*result).clear();
+  (*result).resize(batch_size);
+  int h = inputs_batch_[0].new_im_size_[0];
+  int w = inputs_batch_[0].new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({batch_size, 3, h, w});
+  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  for (int i = 0; i < batch_size; ++i) {
+    std::copy(inputs_batch_[i].im_data_.begin(),
+              inputs_batch_[i].im_data_.end(),
+              inputs_data.begin() + i * 3 * h * w);
+  }
+  im_tensor->copy_from_cpu(inputs_data.data());
+  // im_tensor->copy_from_cpu(inputs_.im_data_.data());
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+  // 获取预测置信度，经过argmax后的labelmap
+  auto output_names = predictor_->GetOutputNames();
+  auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_label_shape = output_label_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_label_shape) {
+    size *= i;
+  }
+  std::vector<int64_t> output_labels(size, 0);
+  output_label_tensor->copy_to_cpu(output_labels.data());
+  auto output_labels_iter = output_labels.begin();
+  int single_batch_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    (*result)[i].label_map.data.resize(single_batch_size);
+    (*result)[i].label_map.shape.push_back(1);
+    for (int j = 1; j < output_label_shape.size(); ++j) {
+      (*result)[i].label_map.shape.push_back(output_label_shape[j]);
+    }
+    std::copy(output_labels_iter + i * single_batch_size,
+              output_labels_iter + (i + 1) * single_batch_size,
+              (*result)[i].label_map.data.data());
+  }
+  // 获取预测置信度scoremap
+  auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
+  std::vector<int> output_score_shape = output_score_tensor->shape();
+  size = 1;
+  for (const auto& i : output_score_shape) {
+    size *= i;
+  }
+  std::vector<float> output_scores(size, 0);
+  output_score_tensor->copy_to_cpu(output_scores.data());
+  auto output_scores_iter = output_scores.begin();
+  int single_batch_score_size = size / batch_size;
+  for (int i = 0; i < batch_size; ++i) {
+    (*result)[i].score_map.data.resize(single_batch_score_size);
+    (*result)[i].score_map.shape.push_back(1);
+    for (int j = 1; j < output_score_shape.size(); ++j) {
+      (*result)[i].score_map.shape.push_back(output_score_shape[j]);
+    }
+    std::copy(output_scores_iter + i * single_batch_score_size,
+              output_scores_iter + (i + 1) * single_batch_score_size,
+              (*result)[i].score_map.data.data());
+  }
+  // 解析输出结果到原图大小
+  for (int i = 0; i < batch_size; ++i) {
+    std::vector<uint8_t> label_map((*result)[i].label_map.data.begin(),
+                                   (*result)[i].label_map.data.end());
+    cv::Mat mask_label((*result)[i].label_map.shape[1],
+                       (*result)[i].label_map.shape[2],
+                       CV_8UC1,
+                       label_map.data());
+    cv::Mat mask_score((*result)[i].score_map.shape[2],
+                       (*result)[i].score_map.shape[3],
+                       CV_32FC1,
+                       (*result)[i].score_map.data.data());
+    int idx = 1;
+    int len_postprocess = inputs_batch_[i].im_size_before_resize_.size();
+    for (std::vector<std::string>::reverse_iterator iter =
+             inputs_batch_[i].reshape_order_.rbegin();
+         iter != inputs_batch_[i].reshape_order_.rend();
+         ++iter) {
+      if (*iter == "padding") {
+        auto before_shape =
+            inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
+        inputs_batch_[i].im_size_before_resize_.pop_back();
+        auto padding_w = before_shape[0];
+        auto padding_h = before_shape[1];
+        mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
+        mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
+      } else if (*iter == "resize") {
+        auto before_shape =
+            inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
+        inputs_batch_[i].im_size_before_resize_.pop_back();
+        auto resize_w = before_shape[0];
+        auto resize_h = before_shape[1];
+        cv::resize(mask_label,
+                   mask_label,
+                   cv::Size(resize_h, resize_w),
+                   0,
+                   0,
+                   cv::INTER_NEAREST);
+        cv::resize(mask_score,
+                   mask_score,
+                   cv::Size(resize_h, resize_w),
+                   0,
+                   0,
+                   cv::INTER_LINEAR);
+      }
+      ++idx;
+    }
+    (*result)[i].label_map.data.assign(mask_label.begin<uint8_t>(),
+                                       mask_label.end<uint8_t>());
+    (*result)[i].label_map.shape = {mask_label.rows, mask_label.cols};
+    (*result)[i].score_map.data.assign(mask_score.begin<float>(),
+                                       mask_score.end<float>());
+    (*result)[i].score_map.shape = {mask_score.rows, mask_score.cols};
+  }
+  return true;
 }
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  if (width_ > 1 & height_ > 1) {
    padding_w = width_ - im->cols;
    padding_h = height_ - im->rows;
-  } else if (coarsest_stride_ > 1) {
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
    padding_h =
-        ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
    padding_w =
-        ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  }
  if (padding_h < 0 || padding_w < 0) {
@@ -219,4 +221,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  }
  return true;
 }
 }  // namespace PaddleX
--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
@@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir,
  std::string image_name(file_path.substr(pos + 1));
  return save_dir + OS_PATH_SEP + image_name;
 }
-}  // namespace of PaddleX
+}  // namespace PaddleX
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
@@ -19,16 +19,16 @@
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
-|  版本说明   | 预测库(1.7.2版本)  |
+|  版本说明   | 预测库(1.8.2版本)  |
 |  ----  | ----  |
-| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
+| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
 更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)

--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
@@ -27,18 +27,18 @@ git clone https://github.com/PaddlePaddle/PaddleX.git
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
-|  版本说明   | 预测库(1.7.2版本)  | 编译器 | 构建工具| cuDNN | CUDA
+|  版本说明   | 预测库(1.8.2版本)  | 编译器 | 构建工具| cuDNN | CUDA |
 |  ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 10.0 |
+| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
-更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
+更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)
 解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为：
 ```
@@ -72,12 +72,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 ![step2.2](../../images/vs2019_step3.png)
-3. 点击：`项目`->`PADDLEX_INFERENCE的CMake设置`
+3. 点击：`项目`->`CMake设置`
 ![step3](../../images/vs2019_step4.png)
 4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
+![step3](../../images/vs2019_step5.png)
 依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐，**使用9.0、10.0版本，不使用9.2、10.1等版本CUDA库**）：
 |  参数名   | 含义  |
@@ -95,13 +97,17 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址，改为下载文件的路径。
-![step4](../../images/vs2019_step5.png)
+4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，解压到某目录\\path\\to\\paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入\\path\\to\\paddlex-encryption。
+![step_encryption](../../images/vs2019_step_encryption.png)
+![step4](../../images/vs2019_step6.png)
 **设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
 5. 点击`生成`->`全部生成`
-![step6](../../images/vs2019_step6.png)
+![step6](../../images/vs2019_step7.png)
 ### Step5: 预测及可视化

--- a/docs/tutorials/deploy/deploy_server/encryption.md
+++ b/docs/tutorials/deploy/deploy_server/encryption.md
@@ -2,7 +2,7 @@
 PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的模型加密工具对推理模型进行加密，预测部署SDK支持直接加载密文模型并完成推理，提升AI模型部署的安全性。
-**注意：目前加密方案仅支持Linux系统**
+**目前加密方案已支持Windows，Linux系统**
 ## 1. 方案简介
@@ -40,9 +40,11 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 ### 1.2 加密工具
-[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时，编译脚本会自动下载加密工具，您也可以选择手动下载。
+[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。
-加密工具包含内容为：
+[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，该版本加密工具需手动下载。
+Linux加密工具包含内容为：
 ```
 paddlex-encryption
 ├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
@@ -52,22 +54,38 @@ paddlex-encryption
 └── tool # paddlex_encrypt_tool
 ```
+Windows加密工具包含内容为：
+```
+paddlex-encryption
+├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
+|
+├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库
+|
+└── tool # paddlex_encrypt_tool.exe 模型加密工具
+```
 ### 1.3 加密PaddleX模型
 对模型完成加密后，加密工具会产生随机密钥信息(用于AES加解密使用），需要在后续加密部署时传入该密钥来用于解密。
 > 密钥由32字节key + 16字节iv组成， 注意这里产生的key是经过base64编码后的，这样可以扩充key的选取范围
+Linux:
 ```
 ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
 ```
+Windows:
+```
+./paddlex-encryption/tool/paddlex_encrypt_tool.exe -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
+```
 `-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`（**注意**：由于PaddleX代码的持续更新，版本低于1.0.0的模型暂时无法直接用于预测部署，参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
 ![](../images/encrypt.png)
 ## 2. PaddleX C++加密部署
-参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+### 2.1 Linux平台使用
+参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
 |  参数   | 说明  |
 |  ----  | ----  |
@@ -83,7 +101,7 @@ paddlex-encryption
 ## 样例
-可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
 `样例一`：
@@ -108,3 +126,34 @@ paddlex-encryption
 ./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+### 2.2 Windows平台使用
+参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe，paddlex_inference\classifer.exe，paddlex_inference\segmenter.exe。
+## 样例
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+`样例一`：
+不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+```shell
+.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=\\path\\to\\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+`样例二`:
+使用`GPU`预测多个图片`\\path\\to\\image_list.txt`，image_list.txt内容的格式如下：
+```
+\\path\\to\\images\\xiaoduxiong1.jpeg
+\\path\\to\\images\\xiaoduxiong2.jpeg
+...
+\\path\\to\\images\\xiaoduxiongn.jpeg
+```
+```shell
+.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\models\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/images/vs2019_step4.png
+++ b/docs/tutorials/deploy/images/vs2019_step4.png
--- a/docs/tutorials/deploy/images/vs2019_step5.png
+++ b/docs/tutorials/deploy/images/vs2019_step5.png
--- a/docs/tutorials/deploy/images/vs2019_step6.png
+++ b/docs/tutorials/deploy/images/vs2019_step6.png
--- a/docs/tutorials/deploy/images/vs2019_step7.png
+++ b/docs/tutorials/deploy/images/vs2019_step7.png
--- a/docs/tutorials/deploy/images/vs2019_step_encryption.png
+++ b/docs/tutorials/deploy/images/vs2019_step_encryption.png
--- a/tools/codestyle/clang_format.hook
+++ b/tools/codestyle/clang_format.hook
 #!/bin/bash
-set -e
+# set -e
+# 
-readonly VERSION="3.8"
+# readonly VERSION="3.8"
+# 
-version=$(clang-format -version)
+# version=$(clang-format -version)
+# 
-if ! [[ $version == *"$VERSION"* ]]; then
+# if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
+#     echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
+#     echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
+#     echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
+#     exit -1
-fi
+# fi
+# 
-clang-format $@
+# clang-format $@