rewrite postprocess for yolo predcit

5b145d45 · FlyingQianMM · fbbdaf3b · 3066636d · 5b145d45 · 5b145d45
74 changed file
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -73,7 +73,11 @@ endif()
 if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
 endif()
-include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+# zlib does not exist in 1.8.1
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
+    include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+endif()
 include_directories("${PADDLE_DIR}/third_party/boost")
 include_directories("${PADDLE_DIR}/third_party/eigen3")
@@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
 endif()
-link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+endif()
 link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
 link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
@@ -107,6 +114,14 @@ include_directories(${OpenCV_INCLUDE_DIRS})
 if (WIN32)
    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    find_package(OpenMP REQUIRED)
+    if (OPENMP_FOUND)
+        message("OPENMP FOUND")
+        set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} ${OpenMP_C_FLAGS}")
+        set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} ${OpenMP_C_FLAGS}")
+        set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} ${OpenMP_CXX_FLAGS}")
+        set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} ${OpenMP_CXX_FLAGS}")
+    endif()
    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
@@ -186,8 +201,13 @@ if(WITH_STATIC_LIB)
    set(DEPS
        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
+    if (NOT WIN32)
      set(DEPS
          ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    else()
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
 endif()
 if (NOT WIN32)
@@ -204,13 +224,16 @@ if (NOT WIN32)
 else()
    set(DEPS ${DEPS}
        ${MATH_LIB} ${MKLDNN_LIB}
-        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+        glog gflags_static libprotobuf xxhash libyaml-cppmt)
+    if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+      set(DEPS ${DEPS} zlibstatic)
+    endif()
    set(DEPS ${DEPS} libcmt shlwapi)
    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
        set(DEPS ${DEPS} snappy)
    endif()
-    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
        set(DEPS ${DEPS} snappystream)
    endif()
 endif(NOT WIN32)
@@ -236,7 +259,9 @@ if(WITH_ENCRYPTION)
      link_directories("${ENCRYPTION_DIR}/lib")
      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
-    message(FATAL_ERROR "Encryption Tool don't support WINDOWS")
+      include_directories("${ENCRYPTION_DIR}/include")
+      link_directories("${ENCRYPTION_DIR}/lib")
+      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
  endif()
 endif()
@@ -284,10 +309,23 @@ if (WIN32 AND WITH_MKL)
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
    )
+    # for encryption
+    if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
+        add_custom_command(TARGET classifier POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET detector POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET segmenter POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+    endif()
 endif()
 file(COPY  "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"

--- a/deploy/cpp/CMakeSettings.json
+++ b/deploy/cpp/CMakeSettings.json
@@ -22,9 +22,9 @@
                    "type": "PATH"
                },
                {
-                    "name": "CMAKE_BUILD_TYPE",
+                    "name": "CUDA_LIB",
-                    "value": "Release",
+                    "value": "",
-                    "type": "STRING"
+                    "type": "PATH"
                },
                {
                    "name": "WITH_STATIC_LIB",
@@ -40,6 +40,16 @@
                    "name": "WITH_GPU",
                    "value": "False",
                    "type": "BOOL"
+                },
+                {
+                    "name": "WITH_ENCRYPTION",
+                    "value": "False",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "ENCRYPTION_DIR",
+                    "value": "",
+                    "type": "PATH"
                }
            ]
        }

--- a/deploy/cpp/cmake/yaml-cpp.cmake
+++ b/deploy/cpp/cmake/yaml-cpp.cmake
-find_package(Git REQUIRED)
 include(ExternalProject)
 message("${CMAKE_BUILD_TYPE}")

--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -13,14 +13,19 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -28,6 +33,11 @@ DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, true, "use ir optimization");
 int main(int argc, char** argv) {
  // Parsing command-line
@@ -44,32 +54,81 @@ int main(int argc, char** argv) {
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_use_ir_optim);
  // 进行预测
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  if (FLAGS_image_list != "") {
    std::ifstream inf(FLAGS_image_list);
    if (!inf) {
      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
      return -1;
    }
+    // 多batch预测
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::ClsResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
-      std::cout << "Predict label: " << result.category
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
-                << ", label_id:" << result.category_id
+      auto start = system_clock::now();
-                << ", score: " << result.score << std::endl;
+      // 读图像
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::ClsResult> results(im_vec_size - i,
+                                              PaddleX::ClsResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      for (int j = i; j < im_vec_size; ++j) {
+        std::cout << "Path:" << image_paths[j]
+                  << ", predict label: " << results[j - i].category
+                  << ", label_id:" << results[j - i].category_id
+                  << ", score: " << results[j - i].score << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::ClsResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    std::cout << "Predict label: " << result.category
              << ", label_id:" << result.category_id
              << ", score: " << result.score << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read time: " << total_imread_time_s / imgs
+            << " s/img, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -13,15 +13,21 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +36,14 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, true, "use ir optimization");
 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -43,11 +57,18 @@ int main(int argc, char** argv) {
    std::cerr << "--image or --image_list need to be defined" << std::endl;
    return -1;
  }
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_use_ir_optim);
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  std::string save_dir = "output";
  // 进行预测
@@ -58,47 +79,83 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::DetResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
-      for (int i = 0; i < result.boxes.size(); ++i) {
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
-        std::cout << "image file: " << image_path
+      auto start = system_clock::now();
-                  << ", predict label: " << result.boxes[i].category
+      int im_vec_size =
-                  << ", label_id:" << result.boxes[i].category_id
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
-                  << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
-                  << result.boxes[i].coordinate[0] << ", "
+      std::vector<PaddleX::DetResult> results(im_vec_size - i,
-                  << result.boxes[i].coordinate[1] << ", "
+                                              PaddleX::DetResult());
-                  << result.boxes[i].coordinate[2] << ", "
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
-                  << result.boxes[i].coordinate[3] << ")" << std::endl;
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      // 输出结果目标框
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        for (int k = 0; k < results[j].boxes.size(); ++k) {
+          std::cout << "image file: " << image_paths[i + j] << ", ";
+          std::cout << "predict label: " << results[j].boxes[k].category
+                    << ", label_id:" << results[j].boxes[k].category_id
+                    << ", score: " << results[j].boxes[k].score
+                    << ", box(xmin, ymin, w, h):("
+                    << results[j].boxes[k].coordinate[0] << ", "
+                    << results[j].boxes[k].coordinate[1] << ", "
+                    << results[j].boxes[k].coordinate[2] << ", "
+                    << results[j].boxes[k].coordinate[3] << ")" << std::endl;
+        }
      }
      // 可视化
-      cv::Mat vis_img =
+      for (int j = 0; j < im_vec_size - i; ++j) {
-          PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        cv::Mat vis_img = PaddleX::Visualize(
+            im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);
        std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
        cv::imwrite(save_path, vis_img);
-      result.clear();
        std::cout << "Visualized output saved as " << save_path << std::endl;
      }
+    }
  } else {
+    auto start = system_clock::now();
    PaddleX::DetResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
+    // 输出结果目标框
    for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
      std::cout << ", predict label: " << result.boxes[i].category
                << ", label_id:" << result.boxes[i].category_id
-                << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
+                << ", score: " << result.boxes[i].score
-                << result.boxes[i].coordinate[0] << ", "
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
-                << result.boxes[i].coordinate[1] << ", "
+                << ", " << result.boxes[i].coordinate[1] << ", "
                << result.boxes[i].coordinate[2] << ", "
                << result.boxes[i].coordinate[3] << ")" << std::endl;
    }
    // 可视化
    cv::Mat vis_img =
-        PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);
    std::string save_path =
        PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
    cv::imwrite(save_path, vis_img);
@@ -106,5 +163,11 @@ int main(int argc, char** argv) {
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -13,15 +13,20 @@
 // limitations under the License.
 #include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +35,11 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
+DEFINE_bool(use_ir_optim, false, "use ir optimization");
 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -46,8 +56,16 @@ int main(int argc, char** argv) {
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key,
+             FLAGS_use_ir_optim);
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  // 进行预测
  if (FLAGS_image_list != "") {
@@ -57,23 +75,54 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::SegResult result;
+      image_paths.push_back(image_path);
-      cv::Mat im = cv::imread(image_path, 1);
+    }
-      model.predict(im, &result);
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::SegResult> results(im_vec_size - i,
+                                              PaddleX::SegResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
      // 可视化
+      for (int j = 0; j < im_vec_size - i; ++j) {
        cv::Mat vis_img =
-          PaddleX::Visualize(im, result, model.labels, colormap);
+            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap);
        std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
        cv::imwrite(save_path, vis_img);
-      result.clear();
        std::cout << "Visualized output saved as " << save_path << std::endl;
      }
+    }
  } else {
+    auto start = system_clock::now();
    PaddleX::SegResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    // 可视化
    cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
    std::string save_path =
@@ -82,6 +131,11 @@ int main(int argc, char** argv) {
    result.clear();
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/include/paddlex/config_parser.h
+++ b/deploy/cpp/include/paddlex/config_parser.h
@@ -54,4 +54,4 @@ class ConfigPaser {
  YAML::Node Transforms_;
 };
-}  // namespace PaddleDetection
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -16,8 +16,11 @@
 #include <functional>
 #include <iostream>
+#include <map>
+#include <memory>
 #include <numeric>
+#include <string>
+#include <vector>
 #include "yaml-cpp/yaml.h"
 #ifdef _WIN32
@@ -28,53 +31,193 @@
 #include "paddle_inference_api.h"  // NOLINT
-#include "config_parser.h"
+#include "config_parser.h"  // NOLINT
-#include "results.h"
+#include "results.h"  // NOLINT
-#include "transforms.h"
+#include "transforms.h"  // NOLINT
 #ifdef WITH_ENCRYPTION
-#include "paddle_model_decrypt.h"
+#include "paddle_model_decrypt.h"  // NOLINT
-#include "model_code.h"
+#include "model_code.h"  // NOLINT
 #endif
 namespace PaddleX {
+/*
+ * @brief
+ * This class encapsulates all necessary proccess steps of model infering, which
+ * include image matrix preprocessing, model predicting and results postprocessing.
+ * The entire process of model infering can be simplified as below:
+ * 1. preprocess image matrix (resize, padding, ......)
+ * 2. model infer
+ * 3. postprocess the results which generated from model infering
+ *
+ * @example
+ *  PaddleX::Model cls_model;
+ *  // initialize model configuration
+ *  cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key);
+ *  // define a Classification result object
+ *  PaddleX::ClsResult cls_result;
+ *  // get image matrix from image file
+ *  cv::Mat im = cv::imread(image_file_path, 1);
+ *  cls_model.predict(im, &cls_result);
+ * */
 class Model {
 public:
+  /*
+   * @brief
+   * This method aims to initialize the model configuration
+   *
+   * @param model_dir: the directory which contains model.yml
+   * @param use_gpu: use gpu or not when infering
+   * @param use_trt: use Tensor RT or not when infering
+   * @param gpu_id: the id of gpu when infering with using gpu
+   * @param key: the key of encryption when using encrypted model
+   * @param use_ir_optim: use ir optimization when infering
+   * */
  void Init(const std::string& model_dir,
            bool use_gpu = false,
            bool use_trt = false,
            int gpu_id = 0,
-            std::string key = "") {
+            std::string key = "",
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key);
+            bool use_ir_optim = true) {
+    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
  }
  void create_predictor(const std::string& model_dir,
                        bool use_gpu = false,
                        bool use_trt = false,
                        int gpu_id = 0,
-                        std::string key = "");
+                        std::string key = "",
+                        bool use_ir_optim = true);
-  bool load_config(const std::string& model_dir);
+  /*
+   * @brief
+   * This method aims to load model configurations which include
+   * transform steps and label list
+   *
+   * @param yaml_input:  model configuration string
+   * @return true if load configuration successfully
+   * */
+  bool load_config(const std::string& yaml_input);
+  /*
+   * @brief
+   * This method aims to transform single image matrix, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im: single image matrix to be transformed
+   * @param blob: the raw data of single image matrix after transformed
+   * @return true if preprocess image matrix successfully
+   * */
  bool preprocess(const cv::Mat& input_im, ImageBlob* blob);
+  /*
+   * @brief
+   * This method aims to transform mutiple image matrixs, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im_batch: a batch of image matrixs to be transformed
+   * @param blob_blob: raw data of a batch of image matrixs after transformed
+   * @param thread_num: the number of preprocessing threads,
+   *                    each thread run preprocess on single image matrix
+   * @return true if preprocess a batch of image matrixs successfully
+   * */
+  bool preprocess(const std::vector<cv::Mat> &input_im_batch,
+                  std::vector<ImageBlob> *blob_batch,
+                  int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: classification prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, ClsResult* result);
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on a batch of image matrixs,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrixs to be predicted
+   * @param results: a batch of classification prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<ClsResult> *results,
+               int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on single image matrix, the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, DetResult* result);
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on a batch of image matrixs, the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<DetResult> *result,
+               int thread_num = 1);
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, SegResult* result);
-  bool postprocess(SegResult* result);
+  /*
+   * @brief
-  bool postprocess(DetResult* result);
+   * This method aims to execute segmentation model prediction on a batch of image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<SegResult> *result,
+               int thread_num = 1);
+  // model type, include 3 type: classifier, detector, segmenter
  std::string type;
+  // model name, such as FasterRCNN, YOLOV3 and so on.
  std::string name;
  std::map<int, std::string> labels;
+  // transform(preprocessing) pipeline manager
  Transforms transforms_;
+  // single input preprocessed data
  ImageBlob inputs_;
+  // batch input preprocessed data
+  std::vector<ImageBlob> inputs_batch_;
+  // raw data of predicting results
  std::vector<float> outputs_;
+  // a predictor which run the model predicting
  std::unique_ptr<paddle::PaddlePredictor> predictor_;
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
@@ -20,9 +20,15 @@
 namespace PaddleX {
+/*
+ * @brief
+ * This class represents mask in instance segmentation tasks.
+ * */
 template <class T>
 struct Mask {
+  // raw data of mask
  std::vector<T> data;
+  // the shape of mask
  std::vector<int> shape;
  void clear() {
    data.clear();
@@ -30,19 +36,34 @@ struct Mask {
  }
 };
+/*
+ * @brief 
+ * This class represents target box in detection or instance segmentation tasks.
+ * */
 struct Box {
  int category_id;
+  // category label this box belongs to
  std::string category;
+  // confidence score
  float score;
  std::vector<float> coordinate;
  Mask<float> mask;
 };
+/*
+ * @brief
+ * This class is prediction result based class.
+ * */
 class BaseResult {
 public:
+  // model type
  std::string type = "base";
 };
+/*
+ * @brief
+ * This class represent classification result.
+ * */
 class ClsResult : public BaseResult {
 public:
  int category_id;
@@ -51,17 +72,28 @@ class ClsResult : public BaseResult {
  std::string type = "cls";
 };
+/*
+ * @brief
+ * This class represent detection or instance segmentation result.
+ * */
 class DetResult : public BaseResult {
 public:
+  // target boxes
  std::vector<Box> boxes;
  int mask_resolution;
  std::string type = "det";
  void clear() { boxes.clear(); }
 };
+/*
+ * @brief
+ * This class represent segmentation result.
+ * */
 class SegResult : public BaseResult {
 public:
+  // represent label of each pixel on image matrix
  Mask<int64_t> label_map;
+  // represent score of each pixel on image matrix
  Mask<float> score_map;
  std::string type = "seg";
  void clear() {

--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -28,7 +28,10 @@
 namespace PaddleX {
-// Object for storing all preprocessed data
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
 class ImageBlob {
 public:
  // Original image height and width
@@ -45,21 +48,34 @@ class ImageBlob {
  std::vector<float> im_data_;
  void clear() {
-    ori_im_size_.clear();
-    new_im_size_.clear();
    im_size_before_resize_.clear();
    reshape_order_.clear();
    im_data_.clear();
  }
 };
-// Abstraction of preprocessing opration class
+/*
+ * @brief
+ * Abstraction of preprocessing operation class
+ * */
 class Transform {
 public:
  virtual void Init(const YAML::Node& item) = 0;
+  /*
+   * @brief
+   * This method executes preprocessing operation on image matrix,
+   * result will be returned at second parameter.
+   * @param im: single image matrix to be preprocessed
+   * @param data: the raw data of single image matrix after preprocessed
+   * @return true if transform successfully
+   * */
  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
 };
+/*
+ * @brief
+ * This class execute normalization operation on image matrix
+ * */
 class Normalize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -74,6 +90,14 @@ class Normalize : public Transform {
  std::vector<float> std_;
 };
+/*
+ * @brief
+ * This class execute resize by short operation on image matrix. At first, it resizes
+ * the short side of image matrix to specified length. Accordingly, the long side
+ * will be resized in the same proportion. If new length of long side exceeds max
+ * size, the long size will be resized to max size, and the short size will be
+ * resized in the same proportion
+ * */
 class ResizeByShort : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -92,6 +116,12 @@ class ResizeByShort : public Transform {
  int max_size_;
 };
+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
 class ResizeByLong : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -103,13 +133,20 @@ class ResizeByLong : public Transform {
  int long_size_;
 };
+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
 class Resize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
+    if (item["interp"].IsDefined()) {
+      interp_ = item["interp"].as<std::string>();
+    }
    if (item["target_size"].IsScalar()) {
      height_ = item["target_size"].as<int>();
      width_ = item["target_size"].as<int>();
-      interp_ = item["interp"].as<std::string>();
    } else if (item["target_size"].IsSequence()) {
      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
      width_ = target_size[0];
@@ -128,6 +165,11 @@ class Resize : public Transform {
  std::string interp_;
 };
+/*
+ * @brief
+ * This class execute center crop operation on image matrix. It crops the center
+ * of image matrix accroding to specified size.
+ * */
 class CenterCrop : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -147,6 +189,11 @@ class CenterCrop : public Transform {
  int width_;
 };
+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
 class Padding : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -175,7 +222,11 @@ class Padding : public Transform {
  int width_ = 0;
  int height_ = 0;
 };
+/*
+ * @brief
+ * This class is transform operations manager. It stores all neccessary
+ * transform operations and run them in correct order.
+ * */
 class Transforms {
 public:
  void Init(const YAML::Node& node, bool to_rgb = true);

--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
@@ -22,9 +22,14 @@
 #include <io.h>
 #else  // Linux/Unix
 #include <dirent.h>
-#include <sys/io.h>
+// #include <sys/io.h>
+#ifdef __arm__  // for arm
+#include <aarch64-linux-gpu/sys/stat.h>
+#include <aarch64-linux-gpu/sys/types.h>
+#else
 #include <sys/stat.h>
 #include <sys/types.h>
+#endif
 #include <unistd.h>
 #endif
 #include <string>
@@ -43,20 +48,55 @@
 namespace PaddleX {
-// Generate visualization colormap for each class
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
 std::vector<int> GenerateColorMap(int num_class);
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const DetResult& results,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap,
                     float threshold = 0.5);
+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const SegResult& result,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap);
+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
 std::string generate_save_path(const std::string& save_dir,
                               const std::string& file_path);
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/scripts/build.sh
+++ b/deploy/cpp/scripts/build.sh
@@ -4,10 +4,10 @@ WITH_GPU=OFF
 WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
-# TensorRT 的路径
+# TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
-TENSORRT_DIR=/path/to/TensorRT/
+TENSORRT_DIR=/root/projects/TensorRT/
-# Paddle 预测库路径
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
-PADDLE_DIR=/docker/jiangjiajun/PaddleDetection/deploy/cpp/fluid_inference
+PADDLE_DIR=/root/projects/fluid_inference
 # Paddle 的预测库是否使用静态库来编译
 # 使用TensorRT时，Paddle的预测库通常为动态库
 WITH_STATIC_LIB=OFF

--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  if (width_ > 1 & height_ > 1) {
    padding_w = width_ - im->cols;
    padding_h = height_ - im->rows;
-  } else if (coarsest_stride_ > 1) {
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
    padding_h =
-        ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
    padding_w =
-        ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  }
  if (padding_h < 0 || padding_w < 0) {
@@ -221,4 +223,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  }
  return true;
 }
 }  // namespace PaddleX
--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
@@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir,
  std::string image_name(file_path.substr(pos + 1));
  return save_dir + OS_PATH_SEP + image_name;
 }
-}  // namespace of PaddleX
+}  // namespace PaddleX
--- a/docs/apis/models/semantic_segmentation.md
+++ b/docs/apis/models/semantic_segmentation.md
@@ -186,10 +186,10 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal
 > **参数**
 > > - **num_classes** (int): 类别数。
-> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
+> > - **width** (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。'18_small_v1'是18的轻量级版本。
 > > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数，只能用于两类分割。可与dice loss同时使用。默认False。
 > > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
-> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
+> > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
 ### train 训练接口

--- a/docs/apis/visualize.md
+++ b/docs/apis/visualize.md
@@ -167,3 +167,20 @@ NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLI
 ### 使用示例
 > 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。
+## 数据预处理/增强过程可视化
+```
+paddlex.transforms.visualize(dataset, 
+                             img_count=3, 
+                             save_dir='vdl_output')
+```
+对数据预处理/增强中间结果进行可视化。
+可使用VisualDL查看中间结果：
+1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001
+2. 浏览器打开 https://0.0.0.0:8001即可，
+    其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
+### 参数
+>* **dataset** (paddlex.datasets): 数据集读取器。
+>* **img_count** (int): 需要进行数据预处理/增强的图像数目。默认为3。
+>* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。
\ No newline at end of file
--- a/docs/paddlex_gui/download.md
+++ b/docs/paddlex_gui/download.md
@@ -25,4 +25,3 @@
  * **硬盘空间**：建议SSD剩余空间1T以上（非必须）  
 ***注：PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
--- a/docs/paddlex_gui/how_to_use.md
+++ b/docs/paddlex_gui/how_to_use.md
--- a/docs/tutorials/datasets.md
+++ b/docs/tutorials/datasets.md
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
@@ -19,16 +19,16 @@
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
-|  版本说明   | 预测库(1.7.2版本)  |
+|  版本说明   | 预测库(1.8.2版本)  |
 |  ----  | ----  |
-| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
+| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
 更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
@@ -55,17 +55,17 @@ WITH_GPU=OFF
 WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
-# TensorRT 的lib路径
+# TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
-TENSORRT_DIR=/path/to/TensorRT/
+TENSORRT_DIR=/root/projects/TensorRT/
-# Paddle 预测库路径
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
-PADDLE_DIR=/path/to/fluid_inference/
+PADDLE_DIR=/root/projects/fluid_inference
 # Paddle 的预测库是否使用静态库来编译
 # 使用TensorRT时，Paddle的预测库通常为动态库
-WITH_STATIC_LIB=ON
+WITH_STATIC_LIB=OFF
 # CUDA 的 lib 路径
-CUDA_LIB=/path/to/cuda/lib/
+CUDA_LIB=/usr/local/cuda/lib64
 # CUDNN 的 lib 路径
-CUDNN_LIB=/path/to/cudnn/lib/
+CUDNN_LIB=/usr/local/cuda/lib64
 # 是否加载加密后的模型
 WITH_ENCRYPTION=ON
@@ -74,8 +74,8 @@ sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具
 ENCRYPTION_DIR=$(pwd)/paddlex-encryption
 # OPENCV 路径, 如果使用自带预编译版本可不修改
+sh $(pwd)/scripts/bootstrap.sh  # 下载预编译版本的opencv
 OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
-sh $(pwd)/scripts/bootstrap.sh
 # 以下无需改动
 rm -rf build
@@ -94,7 +94,6 @@ cmake .. \
    -DENCRYPTION_DIR=${ENCRYPTION_DIR} \
    -DOPENCV_DIR=${OPENCV_DIR}
 make
 ```
 **注意：** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML，如果编译环境无法访问外网，可手动下载：
@@ -117,9 +116,7 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 **在加载模型前，请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件，请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**  
-> **注意：由于PaddleX代码的持续更新，版本低于1.0.0的模型（模型版本可查看model.yml文件中的version字段）暂时无法直接用于预测部署，参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**  
+编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifier`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
-编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
 |  参数   | 说明  |
 |  ----  | ----  |
@@ -127,34 +124,37 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | image  | 要预测的图片文件路径 |
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
-| use_trt  | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
+| use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，**classfier无该参数** |
+| key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |
 ## 样例
-可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。
+可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测，导出到/root/projects，模型路径为/root/projects/inference_model。
 `样例一`：
-不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`  
 ```shell
-./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output
+./build/demo/detector --model_dir=/root/projects/inference_model --image=/root/projects/images/xiaoduxiong.jpeg --save_dir=output
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
 `样例二`:
-使用`GPU`预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`/root/projects/image_list.txt`，image_list.txt内容的格式如下：
 ```
-/path/to/images/xiaoduxiong1.jpeg
+/root/projects/images/xiaoduxiong1.jpeg
-/path/to/images/xiaoduxiong2.jpeg
+/root/projects/images/xiaoduxiong2.jpeg
 ...
-/path/to/images/xiaoduxiongn.jpeg
+/root/projects/images/xiaoduxiongn.jpeg
 ```
 ```shell
-./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output
+./build/demo/detector --model_dir=/root/projects/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
@@ -10,11 +10,10 @@ Windows 平台下，我们使用`Visual Studio 2019 Community` 进行了测试
 请确保系统已经安装好上述基本软件，我们使用的是`VS2019`的社区版。
-**下面所有示例以工作目录为 `D:\projects`演示**。
+**下面所有示例以工作目录为 `D:\projects`演示。**
-### Step1: 下载代码
+### Step1: 下载PaddleX预测代码
-下载源代码
 ```shell
 d:
 mkdir projects
@@ -22,25 +21,24 @@ cd projects
 git clone https://github.com/PaddlePaddle/PaddleX.git
 ```
-**说明**：其中`C++`预测代码在`PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
+**说明**：其中`C++`预测代码在`PaddleX\deploy\cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库，目前PaddleX依赖于Paddle 1.8，基于Paddle 1.8的Paddle预测库下载链接如下所示:
-|  版本说明   | 预测库(1.7.2版本)  | 编译器 | 构建工具| cuDNN | CUDA
+|  版本说明   | 预测库(1.8.2版本)  | 编译器 | 构建工具| cuDNN | CUDA |
 |  ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 10.0 |
+| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
+请根据实际情况选择下载，如若以上版本不满足您的需求，请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。
-更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
+将预测库解压后，其所在目录（例如`D:\projects\fluid_inference\`）下主要包含的内容有：
-解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为：
 ```
 ├── \paddle\ # paddle核心库和头文件
 |
@@ -51,8 +49,8 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 ### Step3: 安装配置OpenCV
-1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe)  
-2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，例如`D:\projects\opencv`
 3. 配置环境变量，如下流程所示  
    - 我的电脑->属性->高级系统设置->环境变量
    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
@@ -63,22 +61,21 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 1. 打开Visual Studio 2019 Community，点击`继续但无需代码`
 ![step2](../../images/vs2019_step1.png)
 2. 点击： `文件`->`打开`->`CMake`
 ![step2.1](../../images/vs2019_step2.png)
-选择项目代码所在路径，并打开`CMakeList.txt`：
+选择C++预测代码所在路径（例如`D:\projects\PaddleX\deploy\cpp`），并打开`CMakeList.txt`：
 ![step2.2](../../images/vs2019_step3.png)
+3. 点击：`项目`->`CMake设置`
-3. 点击：`项目`->`PADDLEX_INFERENCE的CMake设置`
 ![step3](../../images/vs2019_step4.png)
 4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
-依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐，**使用9.0、10.0版本，不使用9.2、10.1等版本CUDA库**）：
+![step3](../../images/vs2019_step5.png)
+依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量与Paddle预测库的对齐，例如Paddle预测库是**使用9.0、10.0版本**编译的，则编译PaddleX预测代码时**不使用9.2、10.1等版本**CUDA库）：
 |  参数名   | 含义  |
 |  ----  | ----  |
@@ -87,38 +84,33 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 | PADDLE_DIR | Paddle c++预测库的路径 |
 **注意：**
-1. 使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾
+1. 如果使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾
 2. 如果使用的是`openblas`版本，请把`WITH_MKL`的`值`去掉勾
 3. Windows环境下编译会自动下载YAML，如果编译环境无法访问外网，可手动下载： [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
 yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址，改为下载文件的路径。
+4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)。例如解压到D:/projects，解压后目录为D:/projects/paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入D:/projects/paddlex-encryption。
-![step4](../../images/vs2019_step5.png)
+![step_encryption](../../images/vs2019_step_encryption.png)
-**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
+![step4](../../images/vs2019_step6.png)
+**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
 5. 点击`生成`->`全部生成`
-![step6](../../images/vs2019_step6.png)
+![step6](../../images/vs2019_step7.png)
 ### Step5: 预测及可视化
 **在加载模型前，请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件，请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**  
-**注意：由于PaddleX代码的持续更新，版本低于1.0.0的模型（模型版本可查看model.yml文件中的version字段）暂时无法直接用于预测部署，参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下，打开`cmd`，并切换到该目录：
 ```
-d:
+D:
 cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 ```
-编译成功后，预测demo的入口程序为`paddlex_inference\detector.exe`，`paddlex_inference\classifer.exe`，`paddlex_inference\segmenter.exe`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+编译成功后，预测demo的入口程序为`paddlex_inference\detector.exe`，`paddlex_inference\classifier.exe`，`paddlex_inference\segmenter.exe`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
 |  参数   | 说明  |
 |  ----  | ----  |
@@ -128,33 +120,45 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classfier无该参数 |
+| key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |
 ## 样例
-可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测。
+可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects，模型路径为D:\projects\inference_model。
-`样例一`：
+### 样例一：(使用未加密的模型对单张图像做预测)
-不使用`GPU`测试图片  `\\path\\to\\xiaoduxiong.jpeg`  
+不使用`GPU`测试图片  `D:\images\xiaoduxiong.jpeg`  
-```shell
+```
-.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=D:\\images\\xiaoduxiong.jpeg --save_dir=output
+.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
-`样例二`:
+### 样例二：(使用未加密的模型对图像列表做预测)
-使用`GPU`预测多个图片`\\path\\to\\image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`D:\images\image_list.txt`，image_list.txt内容的格式如下：
 ```
-\\path\\to\\images\\xiaoduxiong1.jpeg
+D:\images\xiaoduxiong1.jpeg
-\\path\\to\\images\\xiaoduxiong2.jpeg
+D:\images\xiaoduxiong2.jpeg
 ...
-\\path\\to\\images\\xiaoduxiongn.jpeg
+D:\images\xiaoduxiongn.jpeg
 ```
-```shell
+```
-.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output
+.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image_list=D:\images\image_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+### 样例三：(使用加密后的模型对单张图片进行预测)
+如果未对模型进行加密，请参考[加密PaddleX模型](../encryption.html#paddlex)对模型进行加密。例如加密后的模型所在目录为`D:\projects\encrypted_inference_model`。
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\encrypted_inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件可视化预测结果会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/deploy_server/encryption.md
+++ b/docs/tutorials/deploy/deploy_server/encryption.md
@@ -2,7 +2,7 @@
 PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的模型加密工具对推理模型进行加密，预测部署SDK支持直接加载密文模型并完成推理，提升AI模型部署的安全性。
-**注意：目前加密方案仅支持Linux系统**
+**目前加密方案已支持Windows，Linux系统**
 ## 1. 方案简介
@@ -20,7 +20,7 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 ![](../images/encryption_process.png)
-下面是对提供的C/C++加解密库内部实现的中文描述，参考以下步骤可以实现 一套加解密库 来适应自己的场景并通过内存数据load到paddlepaddle中（c/c++预测服务）
+下面是对提供的C/C++加解密库内部实现的中文描述，参考以下步骤可以实现一套加解密库来适应自己的场景并通过内存数据加载到Paddle Inference预测库中
 > 1）考虑到加密的模型文件解密后需要从内存加载数据，使用conbine的模式生成模型文件和参数文件。
 >
@@ -34,15 +34,17 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 >
 > 6）在模型解密环节根据加密后的文件读取相关的加密数据到内存中，对内存数据使用AES算法进行解密，注意解密时需要采用与加密时一致的加密算法和加密的模式，以及密钥的数据和长度，否则会导致解密后数据错误。
 >
-> 7）集成模型预测的C/C++库，在具体使用paddlepaddle预测时一般涉及paddle::AnalysisConfig和paddle:Predictor，为了能够从内存数据中直接load解密后的模型明文数据（避免模型解密后创建临时文件），这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。
+> 7）集成模型预测的C/C++库，在具体使用预测时一般涉及paddle::AnalysisConfig和paddle:Predictor，为了能够从内存数据中直接load解密后的模型明文数据（避免模型解密后创建临时文件），这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。
 需要注意的是，在本方案中，密钥集成在上层预测服务的代码中。故模型的安全强度等同于代码抵御逆向调试的强度。为了保护密钥和模型的安全，开发者还需对自己的应用进行加固保护。常见的应用加固手段有：代码混淆，二进制文件加壳 等等，亦或将加密机制更改为AES白盒加密技术来保护密钥。这类技术领域内有大量商业和开源产品可供选择，此处不一一赘述。
 ### 1.2 加密工具
-[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时，编译脚本会自动下载加密工具，您也可以选择手动下载。
+[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。
-加密工具包含内容为：
+[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，该版本加密工具需手动下载，如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具，此处可不必重复下载。
+Linux加密工具包含内容为：
 ```
 paddlex-encryption
 ├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
@@ -52,22 +54,40 @@ paddlex-encryption
 └── tool # paddlex_encrypt_tool
 ```
+Windows加密工具包含内容为：
+```
+paddlex-encryption
+├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
+|
+├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库
+|
+└── tool # paddlex_encrypt_tool.exe 模型加密工具
+```
 ### 1.3 加密PaddleX模型
 对模型完成加密后，加密工具会产生随机密钥信息(用于AES加解密使用），需要在后续加密部署时传入该密钥来用于解密。
 > 密钥由32字节key + 16字节iv组成， 注意这里产生的key是经过base64编码后的，这样可以扩充key的选取范围
+Linux平台:
 ```
-./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
+# 假设模型在/root/projects下
+./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /root/projects/paddlex_inference_model -save_dir /root/projects/paddlex_encrypted_model
 ```
-`-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`（**注意**：由于PaddleX代码的持续更新，版本低于1.0.0的模型暂时无法直接用于预测部署，参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
+Windows平台:
+```
+# 假设模型在D:/projects下
+.\paddlex-encryption\tool\paddlex_encrypt_tool.exe -model_dir D:\projects\paddlex_inference_model -save_dir D:\projects\paddlex_encrypted_model
+```
+`-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
 ![](../images/encrypt.png)
 ## 2. PaddleX C++加密部署
-参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+### 2.1 Linux平台使用
+参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifier`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
 |  参数   | 说明  |
 |  ----  | ----  |
@@ -75,36 +95,72 @@ paddlex-encryption
 | image  | 要预测的图片文件路径 |
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
-| use_trt  | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
+| use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classifier无该参数 |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |
-## 样例
+### 样例
-可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
-`样例一`：
+#### 样例一：
-不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`  
 ```shell
-./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+./build/demo/detector --model_dir=/root/projects/paddlex_encrypted_model --image=/root/projects/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
-`样例二`:
+#### 样例二:
-使用`GPU`预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`/root/projects/image_list.txt`，image_list.txt内容的格式如下：
 ```
-/path/to/images/xiaoduxiong1.jpeg
+/root/projects/images/xiaoduxiong1.jpeg
-/path/to/images/xiaoduxiong2.jpeg
+/root/projects/xiaoduxiong2.jpeg
 ...
-/path/to/images/xiaoduxiongn.jpeg
+/root/projects/xiaoduxiongn.jpeg
+```
+```shell
+./build/demo/detector --model_dir=/root/projects/models/paddlex_encrypted_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+### 2.2 Windows平台使用
+参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。需自行下载Windows版PaddleX加密工具压缩包，解压，在编译指南的编译流程基础上，在CMake设置中勾选WITH_ENCRYPTION，ENCRYPTION_DIR填写为加密工具包解压后的目录，再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe，paddlex_inference\classifier.exe，paddlex_inference\segmenter.exe。
+### 样例
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+#### 样例一：
+不使用`GPU`测试单张图片，例如图片为`D:\images\xiaoduxiong.jpeg`，加密后的模型目录为`D:\projects\paddlex_encrypted_model`
 ```shell
-./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+#### 样例二:
+使用`GPU`预测图片列表，例如图片列表为`D:\projects\image_list.txt`，`image_list.txt`的内容如下：
+```
+D:\projects\images\xiaoduxiong1.jpeg
+D:\projects\images\xiaoduxiong2.jpeg
+...
+D:\projects\images\xiaoduxiongn.jpeg
+```
+加密后的模型目录例如为`D:\projects\paddlex_encrypted_model`
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image_list=D:\projects\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/images/vs2019_step4.png
+++ b/docs/tutorials/deploy/images/vs2019_step4.png
--- a/docs/tutorials/deploy/images/vs2019_step5.png
+++ b/docs/tutorials/deploy/images/vs2019_step5.png
--- a/docs/tutorials/deploy/images/vs2019_step6.png
+++ b/docs/tutorials/deploy/images/vs2019_step6.png
--- a/docs/tutorials/deploy/images/vs2019_step7.png
+++ b/docs/tutorials/deploy/images/vs2019_step7.png
--- a/docs/tutorials/deploy/images/vs2019_step_encryption.png
+++ b/docs/tutorials/deploy/images/vs2019_step_encryption.png
--- a/docs/tutorials/deploy/upgrade_version.md
+++ b/docs/tutorials/deploy/upgrade_version.md
--- a/examples/human_segmentation/README.md
+++ b/examples/human_segmentation/README.md
+# HumanSeg人像分割模型
+本教程基于PaddleX核心分割网络，提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。
+## 安装
+**前置依赖**
+* paddlepaddle >= 1.8.0
+* python >= 3.5
+```
+pip install paddlex -i https://mirror.baidu.com/pypi/simple
+```
+安装的相关问题参考[PaddleX安装](https://paddlex.readthedocs.io/zh_CN/latest/install.html)
+## 预训练模型
+HumanSeg开放了在大规模人像数据上训练的两个预训练模型，满足多种使用场景的需求
+| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 |
+| --- | --- | --- | ---| --- |
+| HumanSeg-server  | [humanseg_server_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server.pdparams) | [humanseg_server_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型，适用于服务端GPU且背景复杂的人像场景， 模型结构为Deeplabv3+/Xcetion65, 输入大小（512， 512） |
+| HumanSeg-mobile | [humanseg_mobile_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile.pdparams) | [humanseg_mobile_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景，模型结构为HRNet_w18_samll_v1，输入大小（192， 192）  |
+模型性能
+| 模型 | 模型大小 | 计算耗时 |
+| --- | --- | --- |
+|humanseg_server_inference| 158M | - |
+|humanseg_mobile_inference | 5.8 M | 42.35ms |
+|humanseg_mobile_quant | 1.6M | 24.93ms |
+计算耗时运行环境： 小米，cpu：骁龙855， 内存：6GB， 图片大小：192*192
+**NOTE:**
+其中Checkpoint Parameter为模型权重，用于Fine-tuning场景。
+* Inference Model和Quant Inference Model为预测部署模型，包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
+* 其中Inference Model适用于服务端的CPU和GPU预测部署，Qunat Inference Model为量化版本，适用于通过Paddle Lite进行移动端等端侧设备部署。
+执行以下脚本进行HumanSeg预训练模型的下载
+```bash
+python pretrain_weights/download_pretrain_weights.py
+```
+## 下载测试数据
+我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleX可直接加载数据格式。通过运行以下代码进行快速下载，其中包含手机前置摄像头的人像测试视频`video_test.mp4`.
+```bash
+python data/download_data.py
+```
+## 快速体验视频流人像分割
+结合DIS（Dense Inverse Search-basedmethod）光流算法预测结果与分割结果，改善视频流人像分割
+```bash
+# 通过电脑摄像头进行实时分割处理
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference
+# 对人像视频进行分割处理
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4
+```
+视频分割结果如下：
+<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/result.gif" width="20%" height="20%">
+根据所选背景进行背景替换，背景可以是一张图片，也可以是一段视频。
+```bash
+# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg
+# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg
+# 对单张图像进行背景替换
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg
+```
+背景替换结果如下：
+<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/bg_replace.gif" width="20%" height="20%">
+**NOTE**:
+视频分割处理时间需要几分钟，请耐心等待。
+提供的模型适用于手机摄像头竖屏拍摄场景，宽屏效果会略差一些。
+## 训练
+使用下述命令基于与训练模型进行Fine-tuning，请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。
+```bash
+# 指定GPU卡号（以0号卡为例）
+export CUDA_VISIBLE_DEVICES=0
+# 若不使用GPU，则将CUDA_VISIBLE_DEVICES指定为空
+# export CUDA_VISIBLE_DEVICES=
+python train.py --model_type HumanSegMobile \
+--save_dir output/ \
+--data_dir data/mini_supervisely \
+--train_list data/mini_supervisely/train.txt \
+--val_list data/mini_supervisely/val.txt \
+--pretrain_weights pretrain_weights/humanseg_mobile_params \
+--batch_size 8 \
+--learning_rate 0.001 \
+--num_epochs 10 \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_type`: 模型类型，可选项为：HumanSegServer和HumanSegMobile
+* `--save_dir`: 模型保存路径
+* `--data_dir`: 数据集路径
+* `--train_list`: 训练集列表路径
+* `--val_list`: 验证集列表路径
+* `--pretrain_weights`: 预训练模型路径
+* `--batch_size`: 批大小
+* `--learning_rate`: 初始学习率
+* `--num_epochs`: 训练轮数
+* `--image_shape`: 网络输入图像大小（w, h）
+更多命令行帮助可运行下述命令进行查看：
+```bash
+python train.py --help
+```
+**NOTE**
+可通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
+## 评估
+使用下述命令进行评估
+```bash
+python eval.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--val_list data/mini_supervisely/val.txt \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--val_list`: 验证集列表路径
+* `--image_shape`: 网络输入图像大小（w, h）
+## 预测
+使用下述命令进行预测， 预测结果默认保存在`./output/result/`文件夹中。
+```bash
+python infer.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--test_list data/mini_supervisely/test.txt \
+--save_dir output/result \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--test_list`: 测试集列表路径
+* `--image_shape`: 网络输入图像大小（w, h）
+## 模型导出
+```bash
+paddlex --export_inference --model_dir output/best_model \
+--save_dir output/export
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--save_dir`: 导出模型保存路径
+## 离线量化
+```bash
+python quant_offline.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--quant_list data/mini_supervisely/val.txt \
+--save_dir output/quant_offline \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 待量化模型路径
+* `--data_dir`: 数据集路径
+* `--quant_list`: 量化数据集列表路径，一般直接选择训练集或验证集
+* `--save_dir`: 量化模型保存路径
+* `--image_shape`: 网络输入图像大小（w, h）
--- a/examples/human_segmentation/bg_replace.py
+++ b/examples/human_segmentation/bg_replace.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+from postprocess import postprocess, threshold_mask
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg inference for video')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for inference',
+        type=str)
+    parser.add_argument(
+        '--image_path',
+        dest='image_path',
+        help='Image including human',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--background_image_path',
+        dest='background_image_path',
+        help='Background image for replacing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--video_path',
+        dest='video_path',
+        help='Video path for inference',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--background_video_path',
+        dest='background_video_path',
+        help='Background video path for replacing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+def bg_replace(label_map, img, bg):
+    h, w, _ = img.shape
+    bg = cv2.resize(bg, (w, h))
+    label_map = np.repeat(label_map[:, :, np.newaxis], 3, axis=2)
+    comb = (label_map * img + (1 - label_map) * bg).astype(np.uint8)
+    return comb
+def recover(img, im_info):
+    if im_info[0] == 'resize':
+        w, h = im_info[1][1], im_info[1][0]
+        img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
+    elif im_info[0] == 'padding':
+        w, h = im_info[1][0], im_info[1][0]
+        img = img[0:h, 0:w, :]
+    return img
+def infer(args):
+    resize_h = args.image_shape[1]
+    resize_w = args.image_shape[0]
+    test_transforms = transforms.Compose([transforms.Normalize()])
+    model = pdx.load_model(args.model_dir)
+    if not osp.exists(args.save_dir):
+        os.makedirs(args.save_dir)
+    # 图像背景替换
+    if args.image_path is not None:
+        if not osp.exists(args.image_path):
+            raise Exception('The --image_path is not existed: {}'.format(
+                args.image_path))
+        if args.background_image_path is None:
+            raise Exception(
+                'The --background_image_path is not set. Please set it')
+        else:
+            if not osp.exists(args.background_image_path):
+                raise Exception(
+                    'The --background_image_path is not existed: {}'.format(
+                        args.background_image_path))
+        img = cv2.imread(args.image_path)
+        im_shape = img.shape
+        im_scale_x = float(resize_w) / float(im_shape[1])
+        im_scale_y = float(resize_h) / float(im_shape[0])
+        im = cv2.resize(
+            img,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=cv2.INTER_LINEAR)
+        image = im.astype('float32')
+        im_info = ('resize', im_shape[0:2])
+        pred = model.predict(image, test_transforms)
+        label_map = pred['label_map']
+        label_map = recover(label_map, im_info)
+        bg = cv2.imread(args.background_image_path)
+        save_name = osp.basename(args.image_path)
+        save_path = osp.join(args.save_dir, save_name)
+        result = bg_replace(label_map, img, bg)
+        cv2.imwrite(save_path, result)
+    # 视频背景替换，如果提供背景视频则以背景视频作为背景，否则采用提供的背景图片
+    else:
+        is_video_bg = False
+        if args.background_video_path is not None:
+            if not osp.exists(args.background_video_path):
+                raise Exception(
+                    'The --background_video_path is not existed: {}'.format(
+                        args.background_video_path))
+            is_video_bg = True
+        elif args.background_image_path is not None:
+            if not osp.exists(args.background_image_path):
+                raise Exception(
+                    'The --background_image_path is not existed: {}'.format(
+                        args.background_image_path))
+        else:
+            raise Exception(
+                'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path'
+            )
+        disflow = cv2.DISOpticalFlow_create(
+            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
+        prev_gray = np.zeros((resize_h, resize_w), np.uint8)
+        prev_cfd = np.zeros((resize_h, resize_w), np.float32)
+        is_init = True
+        if args.video_path is not None:
+            logging.info('Please wait. It is computing......')
+            if not osp.exists(args.video_path):
+                raise Exception('The --video_path is not existed: {}'.format(
+                    args.video_path))
+            cap_video = cv2.VideoCapture(args.video_path)
+            fps = cap_video.get(cv2.CAP_PROP_FPS)
+            width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            save_name = osp.basename(args.video_path)
+            save_name = save_name.split('.')[0]
+            save_path = osp.join(args.save_dir, save_name + '.avi')
+            cap_out = cv2.VideoWriter(
+                save_path,
+                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
+                (width, height))
+            if is_video_bg:
+                cap_bg = cv2.VideoCapture(args.background_video_path)
+                frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
+                current_frame_bg = 1
+            else:
+                img_bg = cv2.imread(args.background_image_path)
+            while cap_video.isOpened():
+                ret, frame = cap_video.read()
+                if ret:
+                    im_shape = frame.shape
+                    im_scale_x = float(resize_w) / float(im_shape[1])
+                    im_scale_y = float(resize_h) / float(im_shape[0])
+                    im = cv2.resize(
+                        frame,
+                        None,
+                        None,
+                        fx=im_scale_x,
+                        fy=im_scale_y,
+                        interpolation=cv2.INTER_LINEAR)
+                    image = im.astype('float32')
+                    im_info = ('resize', im_shape[0:2])
+                    pred = model.predict(image, test_transforms)
+                    score_map = pred['score_map']
+                    cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                    cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                    score_map = 255 * score_map[:, :, 1]
+                    optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                              disflow, is_init)
+                    prev_gray = cur_gray.copy()
+                    prev_cfd = optflow_map.copy()
+                    is_init = False
+                    optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                    optflow_map = threshold_mask(
+                        optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                    score_map = recover(optflow_map, im_info)
+                    #循环读取背景帧
+                    if is_video_bg:
+                        ret_bg, frame_bg = cap_bg.read()
+                        if ret_bg:
+                            if current_frame_bg == frames_bg:
+                                current_frame_bg = 1
+                                cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
+                        else:
+                            break
+                        current_frame_bg += 1
+                        comb = bg_replace(score_map, frame, frame_bg)
+                    else:
+                        comb = bg_replace(score_map, frame, img_bg)
+                    cap_out.write(comb)
+                else:
+                    break
+            if is_video_bg:
+                cap_bg.release()
+            cap_video.release()
+            cap_out.release()
+        # 当没有输入预测图像和视频的时候，则打开摄像头
+        else:
+            cap_video = cv2.VideoCapture(0)
+            if not cap_video.isOpened():
+                raise IOError("Error opening video stream or file, "
+                              "--video_path whether existing: {}"
+                              " or camera whether working".format(
+                                  args.video_path))
+                return
+            if is_video_bg:
+                cap_bg = cv2.VideoCapture(args.background_video_path)
+                frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
+                current_frame_bg = 1
+            else:
+                img_bg = cv2.imread(args.background_image_path)
+            while cap_video.isOpened():
+                ret, frame = cap_video.read()
+                if ret:
+                    im_shape = frame.shape
+                    im_scale_x = float(resize_w) / float(im_shape[1])
+                    im_scale_y = float(resize_h) / float(im_shape[0])
+                    im = cv2.resize(
+                        frame,
+                        None,
+                        None,
+                        fx=im_scale_x,
+                        fy=im_scale_y,
+                        interpolation=cv2.INTER_LINEAR)
+                    image = im.astype('float32')
+                    im_info = ('resize', im_shape[0:2])
+                    pred = model.predict(image, test_transforms)
+                    score_map = pred['score_map']
+                    cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                    cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                    score_map = 255 * score_map[:, :, 1]
+                    optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                              disflow, is_init)
+                    prev_gray = cur_gray.copy()
+                    prev_cfd = optflow_map.copy()
+                    is_init = False
+                    optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                    optflow_map = threshold_mask(
+                        optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                    score_map = recover(optflow_map, im_info)
+                    #循环读取背景帧
+                    if is_video_bg:
+                        ret_bg, frame_bg = cap_bg.read()
+                        if ret_bg:
+                            if current_frame_bg == frames_bg:
+                                current_frame_bg = 1
+                                cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
+                        else:
+                            break
+                        current_frame_bg += 1
+                        comb = bg_replace(score_map, frame, frame_bg)
+                    else:
+                        comb = bg_replace(score_map, frame, img_bg)
+                    cv2.imshow('HumanSegmentation', comb)
+                    if cv2.waitKey(1) & 0xFF == ord('q'):
+                        break
+                else:
+                    break
+            if is_video_bg:
+                cap_bg.release()
+            cap_video.release()
+if __name__ == "__main__":
+    args = parse_args()
+    infer(args)
--- a/examples/human_segmentation/data/download_data.py
+++ b/examples/human_segmentation/data/download_data.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
+import paddlex as pdx
+def download_data(savepath):
+    url = "https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip"
+    pdx.utils.download_and_decompress(url=url, path=savepath)
+    url = "https://paddleseg.bj.bcebos.com/humanseg/data/video_test.zip"
+    pdx.utils.download_and_decompress(url=url, path=savepath)
+if __name__ == "__main__":
+    download_data(LOCAL_PATH)
+    print("Data download finish!")
--- a/examples/human_segmentation/eval.py
+++ b/examples/human_segmentation/eval.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for evaluating',
+        type=str,
+        default='output/best_model')
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--val_list',
+        dest='val_list',
+        help='Val list file of dataset',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=128)
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+def dict2str(dict_input):
+    out = ''
+    for k, v in dict_input.items():
+        try:
+            v = round(float(v), 6)
+        except:
+            pass
+        out = out + '{}={}, '.format(k, v)
+    return out.strip(', ')
+def evaluate(args):
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.val_list,
+        transforms=eval_transforms)
+    model = pdx.load_model(args.model_dir)
+    metrics = model.evaluate(eval_dataset, args.batch_size)
+    logging.info('[EVAL] Finished, {} .'.format(dict2str(metrics)))
+if __name__ == '__main__':
+    args = parse_args()
+    evaluate(args)
--- a/examples/human_segmentation/infer.py
+++ b/examples/human_segmentation/infer.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+import tqdm
+import paddlex as pdx
+from paddlex.seg import transforms
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg prediction and visualization')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for prediction',
+        type=str)
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--test_list',
+        dest='test_list',
+        help='Test list file of dataset',
+        type=str)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output/result')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+def infer(args):
+    def makedir(path):
+        sub_dir = osp.dirname(path)
+        if not osp.exists(sub_dir):
+            os.makedirs(sub_dir)
+    test_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    model = pdx.load_model(args.model_dir)
+    added_saved_path = osp.join(args.save_dir, 'added')
+    mat_saved_path = osp.join(args.save_dir, 'mat')
+    scoremap_saved_path = osp.join(args.save_dir, 'scoremap')
+    with open(args.test_list, 'r') as f:
+        files = f.readlines()
+    for file in tqdm.tqdm(files):
+        file = file.strip()
+        im_file = osp.join(args.data_dir, file)
+        im = cv2.imread(im_file)
+        result = model.predict(im_file, transforms=test_transforms)
+        # save added image
+        added_image = pdx.seg.visualize(
+            im_file, result, weight=0.6, save_dir=None)
+        added_image_file = osp.join(added_saved_path, file)
+        makedir(added_image_file)
+        cv2.imwrite(added_image_file, added_image)
+        # save score map
+        score_map = result['score_map'][:, :, 1]
+        score_map = (score_map * 255).astype(np.uint8)
+        score_map_file = osp.join(scoremap_saved_path, file)
+        makedir(score_map_file)
+        cv2.imwrite(score_map_file, score_map)
+        # save mat image
+        score_map = np.expand_dims(score_map, axis=-1)
+        mat_image = np.concatenate([im, score_map], axis=2)
+        mat_file = osp.join(mat_saved_path, file)
+        ext = osp.splitext(mat_file)[-1]
+        mat_file = mat_file.replace(ext, '.png')
+        makedir(mat_file)
+        cv2.imwrite(mat_file, mat_image)
+if __name__ == '__main__':
+    args = parse_args()
+    infer(args)
--- a/examples/human_segmentation/postprocess.py
+++ b/examples/human_segmentation/postprocess.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+def cal_optical_flow_tracking(pre_gray, cur_gray, prev_cfd, dl_weights,
+                              disflow):
+    """计算光流跟踪匹配点和光流图
+    输入参数:
+        pre_gray: 上一帧灰度图
+        cur_gray: 当前帧灰度图
+        prev_cfd: 上一帧光流图
+        dl_weights: 融合权重图
+        disflow: 光流数据结构
+    返回值:
+        is_track: 光流点跟踪二值图，即是否具有光流点匹配
+        track_cfd: 光流跟踪图
+    """
+    check_thres = 8
+    h, w = pre_gray.shape[:2]
+    track_cfd = np.zeros_like(prev_cfd)
+    is_track = np.zeros_like(pre_gray)
+    flow_fw = disflow.calc(pre_gray, cur_gray, None)
+    flow_bw = disflow.calc(cur_gray, pre_gray, None)
+    flow_fw = np.round(flow_fw).astype(np.int)
+    flow_bw = np.round(flow_bw).astype(np.int)
+    y_list = np.array(range(h))
+    x_list = np.array(range(w))
+    yv, xv = np.meshgrid(y_list, x_list)
+    yv, xv = yv.T, xv.T
+    cur_x = xv + flow_fw[:, :, 0]
+    cur_y = yv + flow_fw[:, :, 1]
+    # 超出边界不跟踪
+    not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h)
+    flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]]
+    not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) +
+                  np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])
+                  ) >= check_thres
+    track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track]
+    is_track[cur_y[~not_track], cur_x[~not_track]] = 1
+    not_flow = np.all(np.abs(flow_fw) == 0,
+                      axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1)
+    dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05
+    return track_cfd, is_track, dl_weights
+def fuse_optical_flow_tracking(track_cfd, dl_cfd, dl_weights, is_track):
+    """光流追踪图和人像分割结构融合
+    输入参数:
+        track_cfd: 光流追踪图
+        dl_cfd: 当前帧分割结果
+        dl_weights: 融合权重图
+        is_track: 光流点匹配二值图
+    返回
+        cur_cfd: 光流跟踪图和人像分割结果融合图
+    """
+    fusion_cfd = dl_cfd.copy()
+    is_track = is_track.astype(np.bool)
+    fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (
+        1 - dl_weights[is_track]) * track_cfd[is_track]
+    # 确定区域
+    index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track
+    index_less01 = (dl_weights < 0.1) * index_certain
+    fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[
+        index_less01]
+    index_larger09 = (dl_weights >= 0.1) * index_certain
+    fusion_cfd[index_larger09] = 0.4 * dl_cfd[
+        index_larger09] + 0.6 * track_cfd[index_larger09]
+    return fusion_cfd
+def threshold_mask(img, thresh_bg, thresh_fg):
+    dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg)
+    dst[np.where(dst > 1)] = 1
+    dst[np.where(dst < 0)] = 0
+    return dst.astype(np.float32)
+def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init):
+    """光流优化
+    Args:
+        cur_gray : 当前帧灰度图
+        pre_gray : 前一帧灰度图
+        pre_cfd  ：前一帧融合结果
+        scoremap : 当前帧分割结果
+        difflow  : 光流
+        is_init : 是否第一帧
+    Returns:
+        fusion_cfd : 光流追踪图和预测结果融合图
+    """
+    h, w = scoremap.shape
+    cur_cfd = scoremap.copy()
+    if is_init:
+        if h <= 64 or w <= 64:
+            disflow.setFinestScale(1)
+        elif h <= 160 or w <= 160:
+            disflow.setFinestScale(2)
+        else:
+            disflow.setFinestScale(3)
+        fusion_cfd = cur_cfd
+    else:
+        weights = np.ones((h, w), np.float32) * 0.3
+        track_cfd, is_track, weights = cal_optical_flow_tracking(
+            prev_gray, cur_gray, pre_cfd, weights, disflow)
+        fusion_cfd = fuse_optical_flow_tracking(track_cfd, cur_cfd, weights,
+                                                is_track)
+    return fusion_cfd
--- a/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py
+++ b/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
+import paddlex as pdx
+import paddlehub as hub
+model_urls = {
+    "PaddleX_HumanSeg_Server_Params":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_params.tar",
+    "PaddleX_HumanSeg_Server_Inference":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_inference.tar",
+    "PaddleX_HumanSeg_Mobile_Params":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_params.tar",
+    "PaddleX_HumanSeg_Mobile_Inference":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_inference.tar",
+    "PaddleX_HumanSeg_Mobile_Quant":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_quant.tar"
+}
+if __name__ == "__main__":
+    for model_name, url in model_urls.items():
+        pdx.utils.download_and_decompress(url=url, path=LOCAL_PATH)
+    print("Pretrained Model download success!")
--- a/examples/human_segmentation/quant_offline.py
+++ b/examples/human_segmentation/quant_offline.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddlex as pdx
+from paddlex.seg import transforms
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for quant',
+        type=str,
+        default='output/best_model')
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=1)
+    parser.add_argument(
+        '--batch_nums',
+        dest='batch_nums',
+        help='Batch number for quant',
+        type=int,
+        default=10)
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='the root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--quant_list',
+        dest='quant_list',
+        help='Image file list for model quantization, it can be vat.txt or train.txt',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the quant model',
+        type=str,
+        default='./output/quant_offline')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+def evaluate(args):
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.quant_list,
+        transforms=eval_transforms)
+    model = pdx.load_model(args.model_dir)
+    pdx.slim.export_quant_model(model, eval_dataset, args.batch_size,
+                                args.batch_nums, args.save_dir)
+if __name__ == '__main__':
+    args = parse_args()
+    evaluate(args)
--- a/examples/human_segmentation/train.py
+++ b/examples/human_segmentation/train.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddlex as pdx
+from paddlex.seg import transforms
+MODEL_TYPE = ['HumanSegMobile', 'HumanSegServer']
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_type',
+        dest='model_type',
+        help="Model type for traing, which is one of ('HumanSegMobile', 'HumanSegServer')",
+        type=str,
+        default='HumanSegMobile')
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--train_list',
+        dest='train_list',
+        help='Train list file of dataset',
+        type=str)
+    parser.add_argument(
+        '--val_list',
+        dest='val_list',
+        help='Val list file of dataset',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the model snapshot',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        '--num_classes',
+        dest='num_classes',
+        help='Number of classes',
+        type=int,
+        default=2)
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    parser.add_argument(
+        '--num_epochs',
+        dest='num_epochs',
+        help='Number epochs for training',
+        type=int,
+        default=100)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=128)
+    parser.add_argument(
+        '--learning_rate',
+        dest='learning_rate',
+        help='Learning rate',
+        type=float,
+        default=0.01)
+    parser.add_argument(
+        '--pretrain_weights',
+        dest='pretrain_weights',
+        help='The path of pretrianed weight',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--resume_checkpoint',
+        dest='resume_checkpoint',
+        help='The path of resume checkpoint',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--use_vdl',
+        dest='use_vdl',
+        help='Whether to use visualdl',
+        action='store_true')
+    parser.add_argument(
+        '--save_interval_epochs',
+        dest='save_interval_epochs',
+        help='The interval epochs for save a model snapshot',
+        type=int,
+        default=5)
+    return parser.parse_args()
+def train(args):
+    train_transforms = transforms.Compose([
+        transforms.Resize(args.image_shape), transforms.RandomHorizontalFlip(),
+        transforms.Normalize()
+    ])
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    train_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.train_list,
+        transforms=train_transforms,
+        shuffle=True)
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.val_list,
+        transforms=eval_transforms)
+    if args.model_type == 'HumanSegMobile':
+        model = pdx.seg.HRNet(
+            num_classes=args.num_classes, width='18_small_v1')
+    elif args.model_type == 'HumanSegServer':
+        model = pdx.seg.DeepLabv3p(
+            num_classes=args.num_classes, backbone='Xception65')
+    else:
+        raise ValueError(
+            "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', "
+            "'HumanSegLite', 'HumanSegServer')".format(args.model_type))
+    model.train(
+        num_epochs=args.num_epochs,
+        train_dataset=train_dataset,
+        train_batch_size=args.batch_size,
+        eval_dataset=eval_dataset,
+        save_interval_epochs=args.save_interval_epochs,
+        learning_rate=args.learning_rate,
+        pretrain_weights=args.pretrain_weights,
+        resume_checkpoint=args.resume_checkpoint,
+        save_dir=args.save_dir,
+        use_vdl=args.use_vdl)
+if __name__ == '__main__':
+    args = parse_args()
+    train(args)
--- a/examples/human_segmentation/video_infer.py
+++ b/examples/human_segmentation/video_infer.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+from postprocess import postprocess, threshold_mask
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg inference for video')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for inference',
+        type=str)
+    parser.add_argument(
+        '--video_path',
+        dest='video_path',
+        help='Video path for inference, camera will be used if the path not existing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+def recover(img, im_info):
+    if im_info[0] == 'resize':
+        w, h = im_info[1][1], im_info[1][0]
+        img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
+    elif im_info[0] == 'padding':
+        w, h = im_info[1][0], im_info[1][0]
+        img = img[0:h, 0:w, :]
+    return img
+def video_infer(args):
+    resize_h = args.image_shape[1]
+    resize_w = args.image_shape[0]
+    model = pdx.load_model(args.model_dir)
+    test_transforms = transforms.Compose([transforms.Normalize()])
+    if not args.video_path:
+        cap = cv2.VideoCapture(0)
+    else:
+        cap = cv2.VideoCapture(args.video_path)
+    if not cap.isOpened():
+        raise IOError("Error opening video stream or file, "
+                      "--video_path whether existing: {}"
+                      " or camera whether working".format(args.video_path))
+        return
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
+    prev_gray = np.zeros((resize_h, resize_w), np.uint8)
+    prev_cfd = np.zeros((resize_h, resize_w), np.float32)
+    is_init = True
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if args.video_path:
+        logging.info("Please wait. It is computing......")
+        # 用于保存预测结果视频
+        if not osp.exists(args.save_dir):
+            os.makedirs(args.save_dir)
+        out = cv2.VideoWriter(
+            osp.join(args.save_dir, 'result.avi'),
+            cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height))
+        # 开始获取视频帧
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret:
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image, test_transforms)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                score_map = 255 * score_map[:, :, 1]
+                optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                        disflow, is_init)
+                prev_gray = cur_gray.copy()
+                prev_cfd = optflow_map.copy()
+                is_init = False
+                optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                optflow_map = threshold_mask(
+                    optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                img_matting = np.repeat(
+                    optflow_map[:, :, np.newaxis], 3, axis=2)
+                img_matting = recover(img_matting, im_info)
+                bg_im = np.ones_like(img_matting) * 255
+                comb = (img_matting * frame +
+                        (1 - img_matting) * bg_im).astype(np.uint8)
+                out.write(comb)
+            else:
+                break
+        cap.release()
+        out.release()
+    else:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret:
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image, test_transforms)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                score_map = 255 * score_map[:, :, 1]
+                optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                          disflow, is_init)
+                prev_gray = cur_gray.copy()
+                prev_cfd = optflow_map.copy()
+                is_init = False
+                optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                optflow_map = threshold_mask(
+                    optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                img_matting = np.repeat(
+                    optflow_map[:, :, np.newaxis], 3, axis=2)
+                img_matting = recover(img_matting, im_info)
+                bg_im = np.ones_like(img_matting) * 255
+                comb = (img_matting * frame +
+                        (1 - img_matting) * bg_im).astype(np.uint8)
+                cv2.imshow('HumanSegmentation', comb)
+                if cv2.waitKey(1) & 0xFF == ord('q'):
+                    break
+            else:
+                break
+        cap.release()
+if __name__ == "__main__":
+    args = parse_args()
+    video_infer(args)
--- a/new_tutorials/train/README.md
+++ b/new_tutorials/train/README.md
-# 使用教程——训练模型
-本目录下整理了使用PaddleX训练模型的示例代码，代码中均提供了示例数据的自动下载，并均使用单张GPU卡进行训练。
-|代码 | 模型任务 | 数据 |
-|------|--------|---------|
-|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
-|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
-|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
-|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
-|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
-|segmentation/unet.py | 语义分割UNet | 视盘分割 |
-|segmentation/hrnet.py | 语义分割HRNet | 视盘分割 |
-|segmentation/fast_scnn.py | 语义分割FastSCNN | 视盘分割 |
-## 开始训练
-在安装PaddleX后，使用如下命令开始训练
-```
-python classification/mobilenetv2.py
-```
--- a/new_tutorials/train/classification/mobilenetv2.py
+++ b/new_tutorials/train/classification/mobilenetv2.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-from paddlex.cls import transforms
-import paddlex as pdx
-# 下载和解压蔬菜分类数据集
-veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
-pdx.utils.download_and_decompress(veg_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
-train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
-eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
-train_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/train_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/val_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
-model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
-model.train(
-    num_epochs=10,
-    train_dataset=train_dataset,
-    train_batch_size=32,
-    eval_dataset=eval_dataset,
-    lr_decay_epochs=[4, 6, 8],
-    learning_rate=0.025,
-    save_dir='output/mobilenetv2',
-    use_vdl=True)
--- a/new_tutorials/train/classification/resnet50.py
+++ b/new_tutorials/train/classification/resnet50.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddle.fluid as fluid
-from paddlex.cls import transforms
-import paddlex as pdx
-# 下载和解压蔬菜分类数据集
-veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
-pdx.utils.download_and_decompress(veg_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
-train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
-eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
-train_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/train_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/val_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=eval_transforms)
-# PaddleX支持自定义构建优化器
-step_each_epoch = train_dataset.num_samples // 32
-learning_rate = fluid.layers.cosine_decay(
-    learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
-optimizer = fluid.optimizer.Momentum(
-    learning_rate=learning_rate,
-    momentum=0.9,
-    regularization=fluid.regularizer.L2Decay(4e-5))
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
-model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
-model.train(
-    num_epochs=10,
-    train_dataset=train_dataset,
-    train_batch_size=32,
-    eval_dataset=eval_dataset,
-    optimizer=optimizer,
-    save_dir='output/resnet50',
-    use_vdl=True)
--- a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py
+++ b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-from paddlex.det import transforms
-import paddlex as pdx
-# 下载和解压昆虫检测数据集
-insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
-pdx.utils.download_and_decompress(insect_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
-train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
-eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/train_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/val_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# num_classes 需要设置为包含背景类的类别数，即: 目标类别数量 + 1
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
-num_classes = len(train_dataset.labels) + 1
-model = pdx.det.FasterRCNN(num_classes=num_classes)
-model.train(
-    num_epochs=12,
-    train_dataset=train_dataset,
-    train_batch_size=2,
-    eval_dataset=eval_dataset,
-    learning_rate=0.0025,
-    lr_decay_epochs=[8, 11],
-    save_dir='output/faster_rcnn_r50_fpn',
-    use_vdl=True)
--- a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py
+++ b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-from paddlex.det import transforms
-import paddlex as pdx
-# 下载和解压小度熊分拣数据集
-xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
-pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
-train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
-eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
-train_dataset = pdx.datasets.CocoDetection(
-    data_dir='xiaoduxiong_ins_det/JPEGImages',
-    ann_file='xiaoduxiong_ins_det/train.json',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.CocoDetection(
-    data_dir='xiaoduxiong_ins_det/JPEGImages',
-    ann_file='xiaoduxiong_ins_det/val.json',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# num_classes 需要设置为包含背景类的类别数，即: 目标类别数量 + 1
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
-num_classes = len(train_dataset.labels) + 1
-model = pdx.det.MaskRCNN(num_classes=num_classes)
-model.train(
-    num_epochs=12,
-    train_dataset=train_dataset,
-    train_batch_size=1,
-    eval_dataset=eval_dataset,
-    learning_rate=0.00125,
-    warmup_steps=10,
-    lr_decay_epochs=[8, 11],
-    save_dir='output/mask_rcnn_r50_fpn',
-    use_vdl=True)
--- a/new_tutorials/train/detection/yolov3_darknet53.py
+++ b/new_tutorials/train/detection/yolov3_darknet53.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-from paddlex.det import transforms
-import paddlex as pdx
-# 下载和解压昆虫检测数据集
-insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
-pdx.utils.download_and_decompress(insect_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
-train_transforms = transforms.ComposedYOLOv3Transforms(
-    mode='train', shape=[608, 608])
-eval_transforms = transforms.ComposedYOLOv3Transforms(
-    mode='eval', shape=[608, 608])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/train_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/val_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
-num_classes = len(train_dataset.labels)
-model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
-model.train(
-    num_epochs=270,
-    train_dataset=train_dataset,
-    train_batch_size=8,
-    eval_dataset=eval_dataset,
-    learning_rate=0.000125,
-    lr_decay_epochs=[210, 240],
-    save_dir='output/yolov3_darknet53',
-    use_vdl=True)
--- a/new_tutorials/train/segmentation/deeplabv3p.py
+++ b/new_tutorials/train/segmentation/deeplabv3p.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-from paddlex.seg import transforms
-# 下载和解压视盘分割数据集
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
-train_transforms.add_augmenters([
-    transforms.RandomRotate()
-])
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
-train_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/train_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/val_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
-num_classes = len(train_dataset.labels)
-model = pdx.seg.DeepLabv3p(num_classes=num_classes)
-model.train(
-    num_epochs=40,
-    train_dataset=train_dataset,
-    train_batch_size=4,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    save_dir='output/deeplab',
-    use_vdl=True)
--- a/new_tutorials/train/segmentation/hrnet.py
+++ b/new_tutorials/train/segmentation/hrnet.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-from paddlex.seg import transforms
-# 下载和解压视盘分割数据集
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
-train_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/train_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/val_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
-num_classes = len(train_dataset.labels)
-model = pdx.seg.HRNet(num_classes=num_classes)
-model.train(
-    num_epochs=20,
-    train_dataset=train_dataset,
-    train_batch_size=4,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    save_dir='output/hrnet',
-    use_vdl=True)
--- a/new_tutorials/train/segmentation/unet.py
+++ b/new_tutorials/train/segmentation/unet.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-import paddlex as pdx
-from paddlex.seg import transforms
-# 下载和解压视盘分割数据集
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
-train_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/train_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/val_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=eval_transforms)
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
-num_classes = len(train_dataset.labels)
-model = pdx.seg.UNet(num_classes=num_classes)
-model.train(
-    num_epochs=20,
-    train_dataset=train_dataset,
-    train_batch_size=4,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    save_dir='output/unet',
-    use_vdl=True)
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -48,6 +48,7 @@ if hub.version.hub_version < '1.6.2':
 env_info = get_environ_info()
 load_model = cv.models.load_model
 datasets = cv.datasets
+transforms = cv.transforms
 log_level = 2

--- a/paddlex/cv/datasets/coco.py
+++ b/paddlex/cv/datasets/coco.py
@@ -100,7 +100,7 @@ class CocoDetection(VOCDetection):
            gt_score = np.ones((num_bbox, 1), dtype=np.float32)
            is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
            difficult = np.zeros((num_bbox, 1), dtype=np.int32)
-            gt_poly = None
+            gt_poly = [None] * num_bbox
            for i, box in enumerate(bboxes):
                catid = box['category_id']
@@ -108,8 +108,6 @@ class CocoDetection(VOCDetection):
                gt_bbox[i, :] = box['clean_bbox']
                is_crowd[i][0] = box['iscrowd']
                if 'segmentation' in box:
-                    if gt_poly is None:
-                        gt_poly = [None] * num_bbox
                    gt_poly[i] = box['segmentation']
            im_info = {
@@ -121,10 +119,9 @@ class CocoDetection(VOCDetection):
                'gt_class': gt_class,
                'gt_bbox': gt_bbox,
                'gt_score': gt_score,
+                'gt_poly': gt_poly,
                'difficult': difficult
            }
-            if gt_poly is not None:
-                label_info['gt_poly'] = gt_poly
            coco_rec = (im_info, label_info)
            self.file_list.append([im_fname, coco_rec])

--- a/paddlex/cv/datasets/seg_dataset.py
+++ b/paddlex/cv/datasets/seg_dataset.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,7 +28,7 @@ class SegDataset(Dataset):
    Args:
        data_dir (str): 数据集所在的目录路径。
        file_list (str): 描述数据集图片文件和对应标注文件的文件路径（文本内每行路径为相对data_dir的相对路）。
-        label_list (str): 描述数据集包含的类别信息文件路径。
+        label_list (str): 描述数据集包含的类别信息文件路径。默认值为None。
        transforms (list): 数据集中每个样本的预处理/增强算子。
        num_workers (int): 数据集中样本在预处理过程中的线程或进程数。默认为4。
        buffer_size (int): 数据集中样本在预处理过程中队列的缓存长度，以样本数为单位。默认为100。
@@ -40,7 +40,7 @@ class SegDataset(Dataset):
    def __init__(self,
                 data_dir,
                 file_list,
-                 label_list,
+                 label_list=None,
                 transforms=None,
                 num_workers='auto',
                 buffer_size=100,
@@ -56,6 +56,7 @@ class SegDataset(Dataset):
        self.labels = list()
        self._epoch = 0
+        if label_list is not None:
            with open(label_list, encoding=get_encoding(label_list)) as f:
                for line in f:
                    item = line.strip()
@@ -69,8 +70,8 @@ class SegDataset(Dataset):
                full_path_im = osp.join(data_dir, items[0])
                full_path_label = osp.join(data_dir, items[1])
                if not osp.exists(full_path_im):
-                    raise IOError(
+                    raise IOError('The image file {} is not exist!'.format(
-                        'The image file {} is not exist!'.format(full_path_im))
+                        full_path_im))
                if not osp.exists(full_path_label):
                    raise IOError('The image file {} is not exist!'.format(
                        full_path_label))

--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
@@ -106,16 +106,23 @@ class VOCDetection(Dataset):
                    ct = int(tree.find('id').text)
                    im_id = np.array([int(tree.find('id').text)])
                pattern = re.compile('<object>', re.IGNORECASE)
-                obj_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))[0][1:-1]
+                obj_match = pattern.findall(
+                    str(ET.tostringlist(tree.getroot())))
+                if len(obj_match) == 0:
+                    continue
+                obj_tag = obj_match[0][1:-1]
                objs = tree.findall(obj_tag)
                pattern = re.compile('<size>', re.IGNORECASE)
-                size_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))[0][1:-1]
+                size_tag = pattern.findall(
+                    str(ET.tostringlist(tree.getroot())))[0][1:-1]
                size_element = tree.find(size_tag)
                pattern = re.compile('<width>', re.IGNORECASE)
-                width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:-1]
+                width_tag = pattern.findall(
+                    str(ET.tostringlist(size_element)))[0][1:-1]
                im_w = float(size_element.find(width_tag).text)
                pattern = re.compile('<height>', re.IGNORECASE)
-                height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:-1]
+                height_tag = pattern.findall(
+                    str(ET.tostringlist(size_element)))[0][1:-1]
                im_h = float(size_element.find(height_tag).text)
                gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
                gt_class = np.zeros((len(objs), 1), dtype=np.int32)
@@ -124,29 +131,36 @@ class VOCDetection(Dataset):
                difficult = np.zeros((len(objs), 1), dtype=np.int32)
                for i, obj in enumerate(objs):
                    pattern = re.compile('<name>', re.IGNORECASE)
-                    name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
+                    name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
+                        1:-1]
                    cname = obj.find(name_tag).text.strip()
                    gt_class[i][0] = cname2cid[cname]
                    pattern = re.compile('<difficult>', re.IGNORECASE)
-                    diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
+                    diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
+                        1:-1]
                    try:
                        _difficult = int(obj.find(diff_tag).text)
                    except Exception:
                        _difficult = 0
                    pattern = re.compile('<bndbox>', re.IGNORECASE)
-                    box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
+                    box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:
+                                                                            -1]
                    box_element = obj.find(box_tag)
                    pattern = re.compile('<xmin>', re.IGNORECASE)
-                    xmin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1:-1]
+                    xmin_tag = pattern.findall(
+                        str(ET.tostringlist(box_element)))[0][1:-1]
                    x1 = float(box_element.find(xmin_tag).text)
                    pattern = re.compile('<ymin>', re.IGNORECASE)
-                    ymin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1:-1]
+                    ymin_tag = pattern.findall(
+                        str(ET.tostringlist(box_element)))[0][1:-1]
                    y1 = float(box_element.find(ymin_tag).text)
                    pattern = re.compile('<xmax>', re.IGNORECASE)
-                    xmax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1:-1]
+                    xmax_tag = pattern.findall(
+                        str(ET.tostringlist(box_element)))[0][1:-1]
                    x2 = float(box_element.find(xmax_tag).text)
                    pattern = re.compile('<ymax>', re.IGNORECASE)
-                    ymax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1:-1]
+                    ymax_tag = pattern.findall(
+                        str(ET.tostringlist(box_element)))[0][1:-1]
                    y2 = float(box_element.find(ymax_tag).text)
                    x1 = max(0, x1)
                    y1 = max(0, y1)
@@ -176,6 +190,7 @@ class VOCDetection(Dataset):
                    'gt_class': gt_class,
                    'gt_bbox': gt_bbox,
                    'gt_score': gt_score,
+                    'gt_poly': [],
                    'difficult': difficult
                }
                voc_rec = (im_info, label_info)

--- a/paddlex/cv/models/base.py
+++ b/paddlex/cv/models/base.py
@@ -74,6 +74,7 @@ class BaseAPI:
        self.status = 'Normal'
        # 已完成迭代轮数，为恢复训练时的起始轮数
        self.completed_epochs = 0
+        self.scope = fluid.global_scope()
    def _get_single_card_bs(self, batch_size):
        if batch_size % len(self.places) == 0:
@@ -85,6 +86,10 @@ class BaseAPI:
                                'place']))
    def build_program(self):
+        if hasattr(paddlex, 'model_built') and paddlex.model_built:
+            logging.error(
+                "Function model.train() only can be called once in your code.")
+        paddlex.model_built = True
        # 构建训练网络
        self.train_inputs, self.train_outputs = self.build_net(mode='train')
        self.train_prog = fluid.default_main_program()
@@ -143,7 +148,7 @@ class BaseAPI:
            outputs=self.test_outputs,
            batch_size=batch_size,
            batch_nums=batch_num,
-            scope=None,
+            scope=self.scope,
            algo='KL',
            quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
            is_full_quantize=False,
@@ -341,6 +346,7 @@ class BaseAPI:
            var.name for var in list(self.test_inputs.values())
        ]
        test_outputs = list(self.test_outputs.values())
+        with fluid.scope_guard(self.scope):
            if self.__class__.__name__ == 'MaskRCNN':
                from paddlex.utils.save import save_mask_inference_model
                save_mask_inference_model(

--- a/paddlex/cv/models/classifier.py
+++ b/paddlex/cv/models/classifier.py
@@ -232,6 +232,7 @@ class BaseClassifier(BaseAPI):
        true_labels = list()
        pred_scores = list()
        if not hasattr(self, 'parallel_test_prog'):
+            with fluid.scope_guard(self.scope):
                self.parallel_test_prog = fluid.CompiledProgram(
                    self.test_prog).with_data_parallel(
                        share_vars_from=self.parallel_train_prog)
@@ -248,7 +249,9 @@ class BaseClassifier(BaseAPI):
                num_pad_samples = batch_size - num_samples
                pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                images = np.concatenate([images, pad_images])
-            outputs = self.exe.run(self.parallel_test_prog,
+            with fluid.scope_guard(self.scope):
+                outputs = self.exe.run(
+                    self.parallel_test_prog,
                    feed={'image': images},
                    fetch_list=list(self.test_outputs.values()))
            outputs = [outputs[0][:num_samples]]
@@ -325,6 +328,7 @@ class BaseClassifier(BaseAPI):
        im = BaseClassifier._preprocess(images, transforms, self.model_type,
                                        self.__class__.__name__)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im},
                                  fetch_list=list(self.test_outputs.values()),
@@ -362,6 +366,7 @@ class BaseClassifier(BaseAPI):
                                        self.model_type,
                                        self.__class__.__name__, thread_num)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im},
                                  fetch_list=list(self.test_outputs.values()),

--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
@@ -330,6 +330,7 @@ class DeepLabv3p(BaseAPI):
        data_generator = eval_dataset.generator(
            batch_size=batch_size, drop_last=False)
        if not hasattr(self, 'parallel_test_prog'):
+            with fluid.scope_guard(self.scope):
                self.parallel_test_prog = fluid.CompiledProgram(
                    self.test_prog).with_data_parallel(
                        share_vars_from=self.parallel_train_prog)
@@ -356,7 +357,9 @@ class DeepLabv3p(BaseAPI):
                pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                images = np.concatenate([images, pad_images])
            feed_data = {'image': images}
-            outputs = self.exe.run(self.parallel_test_prog,
+            with fluid.scope_guard(self.scope):
+                outputs = self.exe.run(
+                    self.parallel_test_prog,
                    feed=feed_data,
                    fetch_list=list(self.test_outputs.values()),
                    return_numpy=True)
@@ -453,6 +456,7 @@ class DeepLabv3p(BaseAPI):
        im, im_info = DeepLabv3p._preprocess(
            images, transforms, self.model_type, self.__class__.__name__)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im},
                                  fetch_list=list(self.test_outputs.values()),
@@ -483,6 +487,7 @@ class DeepLabv3p(BaseAPI):
            img_file_list, transforms, self.model_type,
            self.__class__.__name__, thread_num)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im},
                                  fetch_list=list(self.test_outputs.values()),

--- a/paddlex/cv/models/faster_rcnn.py
+++ b/paddlex/cv/models/faster_rcnn.py
@@ -349,7 +349,9 @@ class FasterRCNN(BaseAPI):
                'im_info': im_infos,
                'im_shape': im_shapes,
            }
-            outputs = self.exe.run(self.test_prog,
+            with fluid.scope_guard(self.scope):
+                outputs = self.exe.run(
+                    self.test_prog,
                    feed=[feed_data],
                    fetch_list=list(self.test_outputs.values()),
                    return_numpy=False)
@@ -450,6 +452,7 @@ class FasterRCNN(BaseAPI):
        im, im_resize_info, im_shape = FasterRCNN._preprocess(
            images, transforms, self.model_type, self.__class__.__name__)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={
                                      'image': im,
@@ -493,6 +496,7 @@ class FasterRCNN(BaseAPI):
            img_file_list, transforms, self.model_type,
            self.__class__.__name__, thread_num)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={
                                      'image': im,

--- a/paddlex/cv/models/hrnet.py
+++ b/paddlex/cv/models/hrnet.py
@@ -24,11 +24,12 @@ class HRNet(DeepLabv3p):
    Args:
        num_classes (int): 类别数。
-        width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
+        width (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。
+            '18_small_v1'是18的轻量级版本。
        use_bce_loss (bool): 是否使用bce loss作为网络的损失函数，只能用于两类分割。可与dice loss同时使用。默认False。
        use_dice_loss (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。
            当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
-        class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候，长度应为
+        class_weight (list|str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候，长度应为
            num_classes。当class_weight为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重
            自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，
            即平时使用的交叉熵损失函数。
@@ -168,6 +169,6 @@ class HRNet(DeepLabv3p):
        return super(HRNet, self).train(
            num_epochs, train_dataset, train_batch_size, eval_dataset,
            save_interval_epochs, log_interval_steps, save_dir,
-            pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl,
+            pretrain_weights, optimizer, learning_rate, lr_decay_power,
-            sensitivities_file, eval_metric_loss, early_stop,
+            use_vdl, sensitivities_file, eval_metric_loss, early_stop,
            early_stop_patience, resume_checkpoint)
--- a/paddlex/cv/models/load_model.py
+++ b/paddlex/cv/models/load_model.py
@@ -25,6 +25,7 @@ from paddlex.cv.transforms import build_transforms, build_transforms_v1
 def load_model(model_dir, fixed_input_shape=None):
+    model_scope = fluid.Scope()
    if not osp.exists(osp.join(model_dir, "model.yml")):
        raise Exception("There's not model.yml in {}".format(model_dir))
    with open(osp.join(model_dir, "model.yml")) as f:
@@ -52,6 +53,7 @@ def load_model(model_dir, fixed_input_shape=None):
                             format(fixed_input_shape))
                model.fixed_input_shape = fixed_input_shape
+    with fluid.scope_guard(model_scope):
        if status == "Normal" or \
                status == "Prune" or status == "fluid.save":
            startup_prog = fluid.Program()
@@ -80,7 +82,8 @@ def load_model(model_dir, fixed_input_shape=None):
            model.test_inputs = OrderedDict()
            model.test_outputs = OrderedDict()
            for name in input_names:
-            model.test_inputs[name] = model.test_prog.global_block().var(name)
+                model.test_inputs[name] = model.test_prog.global_block().var(
+                    name)
            for i, out in enumerate(outputs):
                var_desc = test_outputs_info[i]
                model.test_outputs[var_desc[0]] = out
@@ -108,6 +111,7 @@ def load_model(model_dir, fixed_input_shape=None):
                model.__dict__[k] = v
    logging.info("Model[{}] loaded.".format(info['Model']))
+    model.scope = model_scope
    model.trainable = False
    model.status = status
    return model

--- a/paddlex/cv/models/mask_rcnn.py
+++ b/paddlex/cv/models/mask_rcnn.py
@@ -289,7 +289,9 @@ class MaskRCNN(FasterRCNN):
                'im_info': im_infos,
                'im_shape': im_shapes,
            }
-            outputs = self.exe.run(self.test_prog,
+            with fluid.scope_guard(self.scope):
+                outputs = self.exe.run(
+                    self.test_prog,
                    feed=[feed_data],
                    fetch_list=list(self.test_outputs.values()),
                    return_numpy=False)
@@ -385,6 +387,7 @@ class MaskRCNN(FasterRCNN):
        im, im_resize_info, im_shape = FasterRCNN._preprocess(
            images, transforms, self.model_type, self.__class__.__name__)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={
                                      'image': im,
@@ -428,6 +431,7 @@ class MaskRCNN(FasterRCNN):
            img_file_list, transforms, self.model_type,
            self.__class__.__name__, thread_num)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={
                                      'image': im,

--- a/paddlex/cv/models/slim/post_quantization.py
+++ b/paddlex/cv/models/slim/post_quantization.py
@@ -154,8 +154,8 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
        logging.info("Start to run batch!")
        for data in self._data_loader():
            start = time.time()
-            self._executor.run(
+            with fluid.scope_guard(self._scope):
-                program=self._program,
+                self._executor.run(program=self._program,
                                   feed=data,
                                   fetch_list=self._fetch_list,
                                   return_numpy=False)
@@ -164,10 +164,9 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
            else:
                self._sample_threshold()
            end = time.time()
-            logging.debug('[Run batch data] Batch={}/{}, time_each_batch={} s.'.format(
+            logging.debug(
-                str(batch_id + 1),
+                '[Run batch data] Batch={}/{}, time_each_batch={} s.'.format(
-                str(batch_ct),
+                    str(batch_id + 1), str(batch_ct), str(end - start)))
-                str(end-start)))
            batch_id += 1
            if self._batch_nums and batch_id >= self._batch_nums:
                break
@@ -194,6 +193,7 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
        Returns:
            None
        '''
+        with fluid.scope_guard(self._scope):
            feed_vars_names = [var.name for var in self._feed_list]
            fluid.io.save_inference_model(
                dirname=save_model_path,
@@ -212,7 +212,8 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
        self._data_loader = fluid.io.DataLoader.from_generator(
            feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True)
        self._data_loader.set_sample_list_generator(
-            self._dataset.generator(self._batch_size, drop_last=True),
+            self._dataset.generator(
+                self._batch_size, drop_last=True),
            places=self._place)
    def _calculate_kl_threshold(self):
@@ -235,10 +236,12 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
                    weight_threshold.append(abs_max_value)
            self._quantized_var_kl_threshold[var_name] = weight_threshold
            end = time.time()
-            logging.debug('[Calculate weight] Weight_id={}/{}, time_each_weight={} s.'.format(
+            logging.debug(
+                '[Calculate weight] Weight_id={}/{}, time_each_weight={} s.'.
+                format(
                    str(ct),
-                str(len(self._quantized_weight_var_name)),
+                    str(len(self._quantized_weight_var_name)), str(end -
-                str(end-start)))
+                                                                   start)))
            ct += 1
        ct = 1
@@ -257,10 +260,12 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
                self._quantized_var_kl_threshold[var_name] = \
                    self._get_kl_scaling_factor(np.abs(sampling_data))
                end = time.time()
-                logging.debug('[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.format(
+                logging.debug(
+                    '[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.
+                    format(
                        str(ct),
                        str(len(self._quantized_act_var_name)),
-                    str(end-start)))
+                        str(end - start)))
                ct += 1
        else:
            for var_name in self._quantized_act_var_name:
@@ -270,10 +275,10 @@ class PaddleXPostTrainingQuantization(PostTrainingQuantization):
                self._quantized_var_kl_threshold[var_name] = \
                    self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name]))
                end = time.time()
-                logging.debug('[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.format(
+                logging.debug(
+                    '[Calculate activation] Activation_id={}/{}, time_each_activation={} s.'.
+                    format(
                        str(ct),
                        str(len(self._quantized_act_var_name)),
-                    str(end-start)))
+                        str(end - start)))
                ct += 1
\ No newline at end of file
--- a/paddlex/cv/models/slim/prune_config.py
+++ b/paddlex/cv/models/slim/prune_config.py
@@ -67,8 +67,7 @@ sensitivities_data = {
    'https://bj.bcebos.com/paddlex/slim_prune/yolov3_darknet53.sensitivities',
    'YOLOv3_ResNet34':
    'https://bj.bcebos.com/paddlex/slim_prune/yolov3_resnet34.sensitivities',
-    'UNet':
+    'UNet': 'https://bj.bcebos.com/paddlex/slim_prune/unet.sensitivities',
-    'https://bj.bcebos.com/paddlex/slim_prune/unet.sensitivities',
    'DeepLabv3p_MobileNetV2_x0.25':
    'https://bj.bcebos.com/paddlex/slim_prune/deeplab_mobilenetv2_x0.25_no_aspp_decoder.sensitivities',
    'DeepLabv3p_MobileNetV2_x0.5':
@@ -103,8 +102,8 @@ def get_sensitivities(flag, model, save_dir):
        model_type = model_name + '_' + model.backbone
    if model_type.startswith('DeepLabv3p_Xception'):
        model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
-    elif hasattr(model, 'encoder_with_aspp') or hasattr(
+    elif hasattr(model, 'encoder_with_aspp') or hasattr(model,
-            model, 'enable_decoder'):
+                                                        'enable_decoder'):
        model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
    if osp.isfile(flag):
        return flag
@@ -116,7 +115,6 @@ def get_sensitivities(flag, model, save_dir):
        paddlex.utils.download(url, path=save_dir)
        return osp.join(save_dir, fname)
 #        try:
 #            hub.download(fname, save_path=save_dir)
 #        except Exception as e:
@@ -126,7 +124,7 @@ def get_sensitivities(flag, model, save_dir):
 #                        model_type, fname))
 #            elif isinstance(e, hub.ServerConnectionError):
 #                raise Exception(
-#                    "Cannot get reource for model {}(key='{}'), please check your internet connecgtion"
+#                    "Cannot get reource for model {}(key='{}'), please check your internet connection"
 #                    .format(model_type, fname))
 #            else:
 #                raise Exception(
@@ -162,7 +160,8 @@ def get_prune_params(model):
        if model_type == 'AlexNet':
            prune_names.remove('conv5_weights')
        if model_type == 'ShuffleNetV2':
-            not_prune_names = ['stage_2_1_conv5_weights',
+            not_prune_names = [
+                'stage_2_1_conv5_weights',
                'stage_2_1_conv3_weights',
                'stage_2_2_conv3_weights',
                'stage_2_3_conv3_weights',
@@ -180,7 +179,8 @@ def get_prune_params(model):
                'stage_4_1_conv3_weights',
                'stage_4_2_conv3_weights',
                'stage_4_3_conv3_weights',
-                        'stage_4_4_conv3_weights',]
+                'stage_4_4_conv3_weights',
+            ]
            for name in not_prune_names:
                prune_names.remove(name)
    elif model_type == "MobileNetV1":

--- a/paddlex/cv/models/utils/pretrain_weights.py
+++ b/paddlex/cv/models/utils/pretrain_weights.py
@@ -83,7 +83,7 @@ coco_pretrain = {
    'YOLOv3_MobileNetV1_COCO':
    'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar',
    'YOLOv3_MobileNetV3_large_COCO':
-    'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams',
+    'https://bj.bcebos.com/paddlex/models/yolov3_mobilenet_v3.tar',
    'YOLOv3_ResNet34_COCO':
    'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar',
    'YOLOv3_ResNet50_vd_COCO':
@@ -203,7 +203,7 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir):
                    backbone))
            elif isinstance(e, hub.ServerConnectionError):
                raise Exception(
-                    "Cannot get reource for backbone {}, please check your internet connecgtion"
+                    "Cannot get reource for backbone {}, please check your internet connection"
                    .format(backbone))
            else:
                raise Exception(
@@ -231,7 +231,7 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir):
                    backbone))
            elif isinstance(hub.ServerConnectionError):
                raise Exception(
-                    "Cannot get reource for backbone {}, please check your internet connecgtion"
+                    "Cannot get reource for backbone {}, please check your internet connection"
                    .format(backbone))
            else:
                raise Exception(

--- a/paddlex/cv/models/yolo_v3.py
+++ b/paddlex/cv/models/yolo_v3.py
@@ -318,7 +318,9 @@ class YOLOv3(BaseAPI):
            images = np.array([d[0] for d in data])
            im_sizes = np.array([d[1] for d in data])
            feed_data = {'image': images, 'im_size': im_sizes}
-            outputs = self.exe.run(self.test_prog,
+            with fluid.scope_guard(self.scope):
+                outputs = self.exe.run(
+                    self.test_prog,
                    feed=[feed_data],
                    fetch_list=list(self.test_outputs.values()),
                    return_numpy=False)
@@ -368,14 +370,7 @@ class YOLOv3(BaseAPI):
        return im, im_size
    @staticmethod
-    def _postprocess(results, test_outputs_keys, batch_size, num_classes,
+    def _postprocess(res, batch_size, num_classes, labels):
-                     labels):
-        res = {
-            k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(list(test_outputs_keys), results)
-        }
-        res['im_id'] = (np.array(
-            [[i] for i in range(batch_size)]).astype('int32'), [[]])
        clsid2catid = dict({i: i for i in range(num_classes)})
        xywh_results = bbox2out([res], clsid2catid)
        preds = [[] for i in range(batch_size)]
@@ -411,6 +406,7 @@ class YOLOv3(BaseAPI):
        im, im_size = YOLOv3._preprocess(images, transforms, self.model_type,
                                         self.__class__.__name__)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im,
                                        'im_size': im_size},
@@ -418,8 +414,13 @@ class YOLOv3(BaseAPI):
                                  return_numpy=False,
                                  use_program_cache=True)
-        preds = YOLOv3._postprocess(result,
+        res = {
-                                    list(self.test_outputs.keys()),
+            k: (np.array(v), v.recursive_sequence_lengths())
+            for k, v in zip(list(self.test_outputs.keys()), result)
+        }
+        res['im_id'] = (np.array(
+            [[i] for i in range(len(images))]).astype('int32'), [[]])
+        preds = YOLOv3._postprocess(res,
                                    len(images), self.num_classes, self.labels)
        return preds[0]
@@ -448,6 +449,7 @@ class YOLOv3(BaseAPI):
                                         self.model_type,
                                         self.__class__.__name__, thread_num)
+        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
                                  feed={'image': im,
                                        'im_size': im_size},
@@ -455,8 +457,13 @@ class YOLOv3(BaseAPI):
                                  return_numpy=False,
                                  use_program_cache=True)
-        preds = YOLOv3._postprocess(result,
+        res = {
-                                    list(self.test_outputs.keys()),
+            k: (np.array(v), v.recursive_sequence_lengths())
+            for k, v in zip(list(self.test_outputs.keys()), result)
+        }
+        res['im_id'] = (np.array(
+            [[i] for i in range(len(img_file_list))]).astype('int32'), [[]])
+        preds = YOLOv3._postprocess(res,
                                    len(img_file_list), self.num_classes,
                                    self.labels)
        return preds
--- a/paddlex/cv/nets/hrnet.py
+++ b/paddlex/cv/nets/hrnet.py
@@ -51,15 +51,38 @@ class HRNet(object):
        self.width = width
        self.has_se = has_se
+        self.num_modules = {
+            '18_small_v1': [1, 1, 1, 1],
+            '18': [1, 1, 4, 3],
+            '30': [1, 1, 4, 3],
+            '32': [1, 1, 4, 3],
+            '40': [1, 1, 4, 3],
+            '44': [1, 1, 4, 3],
+            '48': [1, 1, 4, 3],
+            '60': [1, 1, 4, 3],
+            '64': [1, 1, 4, 3]
+        }
+        self.num_blocks = {
+            '18_small_v1': [[1], [2, 2], [2, 2, 2], [2, 2, 2, 2]],
+            '18': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '30': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '32': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '40': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '44': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '48': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '60': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
+            '64': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]]
+        }
        self.channels = {
-            18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
+            '18_small_v1': [[32], [16, 32], [16, 32, 64], [16, 32, 64, 128]],
-            30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
+            '18': [[64], [18, 36], [18, 36, 72], [18, 36, 72, 144]],
-            32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
+            '30': [[64], [30, 60], [30, 60, 120], [30, 60, 120, 240]],
-            40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
+            '32': [[64], [32, 64], [32, 64, 128], [32, 64, 128, 256]],
-            44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
+            '40': [[64], [40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
+            '44': [[64], [44, 88], [44, 88, 176], [44, 88, 176, 352]],
-            60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
+            '48': [[64], [48, 96], [48, 96, 192], [48, 96, 192, 384]],
-            64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]],
+            '60': [[64], [60, 120], [60, 120, 240], [60, 120, 240, 480]],
+            '64': [[64], [64, 128], [64, 128, 256], [64, 128, 256, 512]],
        }
        self.freeze_at = freeze_at
@@ -73,31 +96,38 @@ class HRNet(object):
    def net(self, input):
        width = self.width
-        channels_2, channels_3, channels_4 = self.channels[width]
+        channels_1, channels_2, channels_3, channels_4 = self.channels[str(
-        num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
+            width)]
+        num_modules_1, num_modules_2, num_modules_3, num_modules_4 = self.num_modules[
+            str(width)]
+        num_blocks_1, num_blocks_2, num_blocks_3, num_blocks_4 = self.num_blocks[
+            str(width)]
        x = self.conv_bn_layer(
            input=input,
            filter_size=3,
-            num_filters=64,
+            num_filters=channels_1[0],
            stride=2,
            if_act=True,
            name='layer1_1')
        x = self.conv_bn_layer(
            input=x,
            filter_size=3,
-            num_filters=64,
+            num_filters=channels_1[0],
            stride=2,
            if_act=True,
            name='layer1_2')
-        la1 = self.layer1(x, name='layer2')
+        la1 = self.layer1(x, num_blocks_1, channels_1, name='layer2')
        tr1 = self.transition_layer([la1], [256], channels_2, name='tr1')
-        st2 = self.stage(tr1, num_modules_2, channels_2, name='st2')
+        st2 = self.stage(
+            tr1, num_modules_2, num_blocks_2, channels_2, name='st2')
        tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2')
-        st3 = self.stage(tr2, num_modules_3, channels_3, name='st3')
+        st3 = self.stage(
+            tr2, num_modules_3, num_blocks_3, channels_3, name='st3')
        tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3')
-        st4 = self.stage(tr3, num_modules_4, channels_4, name='st4')
+        st4 = self.stage(
+            tr3, num_modules_4, num_blocks_4, channels_4, name='st4')
        # classification
        if self.num_classes:
@@ -139,12 +169,12 @@ class HRNet(object):
        self.end_points = st4
        return st4[-1]
-    def layer1(self, input, name=None):
+    def layer1(self, input, num_blocks, channels, name=None):
        conv = input
-        for i in range(4):
+        for i in range(num_blocks[0]):
            conv = self.bottleneck_block(
                conv,
-                num_filters=64,
+                num_filters=channels[0],
                downsample=True if i == 0 else False,
                name=name + '_' + str(i + 1))
        return conv
@@ -178,7 +208,7 @@ class HRNet(object):
        out = []
        for i in range(len(channels)):
            residual = x[i]
-            for j in range(block_num):
+            for j in range(block_num[i]):
                residual = self.basic_block(
                    residual,
                    channels[i],
@@ -240,10 +270,11 @@ class HRNet(object):
    def high_resolution_module(self,
                               x,
+                               num_blocks,
                               channels,
                               multi_scale_output=True,
                               name=None):
-        residual = self.branches(x, 4, channels, name=name)
+        residual = self.branches(x, num_blocks, channels, name=name)
        out = self.fuse_layers(
            residual,
            channels,
@@ -254,6 +285,7 @@ class HRNet(object):
    def stage(self,
              x,
              num_modules,
+              num_blocks,
              channels,
              multi_scale_output=True,
              name=None):
@@ -262,12 +294,13 @@ class HRNet(object):
            if i == num_modules - 1 and multi_scale_output == False:
                out = self.high_resolution_module(
                    out,
+                    num_blocks,
                    channels,
                    multi_scale_output=False,
                    name=name + '_' + str(i + 1))
            else:
                out = self.high_resolution_module(
-                    out, channels, name=name + '_' + str(i + 1))
+                    out, num_blocks, channels, name=name + '_' + str(i + 1))
        return out

--- a/paddlex/cv/nets/segmentation/hrnet.py
+++ b/paddlex/cv/nets/segmentation/hrnet.py
@@ -82,7 +82,8 @@ class HRNet(object):
        st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape)
        out = fluid.layers.concat(st4, axis=1)
-        last_channels = sum(self.backbone.channels[self.backbone.width][-1])
+        last_channels = sum(self.backbone.channels[str(self.backbone.width)][
+            -1])
        out = self._conv_bn_layer(
            input=out,

--- a/paddlex/cv/transforms/__init__.py
+++ b/paddlex/cv/transforms/__init__.py
@@ -16,6 +16,9 @@ from . import cls_transforms
 from . import det_transforms
 from . import seg_transforms
+from . import visualize
+visualize = visualize.visualize
 def build_transforms(model_type, transforms_info, to_rgb=True):
    if model_type == "classifier":

--- a/paddlex/cv/transforms/cls_transforms.py
+++ b/paddlex/cv/transforms/cls_transforms.py
@@ -32,10 +32,8 @@ class ClsTransform:
 class Compose(ClsTransform):
    """根据数据预处理/增强算子对输入数据进行操作。
       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
    Args:
        transforms (list): 数据预处理/增强算子。
    Raises:
        TypeError: 形参数据类型不满足需求。
        ValueError: 数据长度不匹配。
@@ -434,6 +432,7 @@ class RandomDistort(ClsTransform):
            params['im'] = im
            if np.random.uniform(0, 1) < prob:
                im = ops[id](**params)
+        im = im.astype('float32')
        if label is None:
            return (im, )
        else:

--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
@@ -41,10 +41,8 @@ class DetTransform:
 class Compose(DetTransform):
    """根据数据预处理/增强列表对输入数据进行操作。
       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
    Args:
        transforms (list): 数据预处理/增强列表。
    Raises:
        TypeError: 形参数据类型不满足需求。
        ValueError: 数据长度不匹配。
@@ -623,6 +621,7 @@ class RandomDistort(DetTransform):
            if np.random.uniform(0, 1) < prob:
                im = ops[id](**params)
+        im = im.astype('float32')
        if label_info is None:
            return (im, im_info)
        else:
@@ -829,7 +828,7 @@ class RandomExpand(DetTransform):
                'gt_class' not in label_info:
            raise TypeError('Cannot do RandomExpand! ' + \
                            'Becasuse gt_bbox/gt_class is not in label_info!')
-        if np.random.uniform(0., 1.) < self.prob:
+        if np.random.uniform(0., 1.) > self.prob:
            return (im, im_info, label_info)
        if 'gt_class' in label_info and 0 in label_info['gt_class']:

--- a/paddlex/cv/transforms/seg_transforms.py
+++ b/paddlex/cv/transforms/seg_transforms.py
@@ -35,14 +35,11 @@ class SegTransform:
 class Compose(SegTransform):
    """根据数据预处理/增强算子对输入数据进行操作。
       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
    Args:
        transforms (list): 数据预处理/增强算子。
    Raises:
        TypeError: transforms不是list对象
        ValueError: transforms元素个数小于1。
    """
    def __init__(self, transforms):
@@ -71,7 +68,6 @@ class Compose(SegTransform):
                图像在过resize前shape为(200, 300)， 过padding前shape为
                (400, 600)
            label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
        Returns:
            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
        """
@@ -1054,6 +1050,7 @@ class RandomDistort(SegTransform):
            params['im'] = im
            if np.random.uniform(0, 1) < prob:
                im = ops[id](**params)
+        im = im.astype('float32')
        if label is None:
            return (im, im_info)
        else:

--- a/paddlex/cv/transforms/visualize.py
+++ b/paddlex/cv/transforms/visualize.py
--- a/paddlex/deploy.py
+++ b/paddlex/deploy.py
@@ -205,7 +205,7 @@ class Predictor:
        """
        preprocessed_input = self.preprocess([image])
        model_pred = self.raw_predict(preprocessed_input)
-        im_shape = None if 'im_shape' in preprocessed_input else preprocessed_input[
+        im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[
            'im_shape']
        results = self.postprocess(
            model_pred, topk=topk, batch_size=1, im_shape=im_shape)

--- a/paddlex/tools/x2coco.py
+++ b/paddlex/tools/x2coco.py
@@ -100,7 +100,7 @@ class LabelMe2COCO(X2COCO):
        image["height"] = json_info["imageHeight"]
        image["width"] = json_info["imageWidth"]
        image["id"] = image_id + 1
-        image["file_name"] = json_info["imagePath"].split("/")[-1]
+        image["file_name"] = osp.split(json_info["imagePath"])[-1]
        return image
    def generate_polygon_anns_field(self, height, width, 

--- a/tools/codestyle/clang_format.hook
+++ b/tools/codestyle/clang_format.hook
 #!/bin/bash
-set -e
+# set -e
+# 
-readonly VERSION="3.8"
+# readonly VERSION="3.8"
+# 
-version=$(clang-format -version)
+# version=$(clang-format -version)
+# 
-if ! [[ $version == *"$VERSION"* ]]; then
+# if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
+#     echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
+#     echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
+#     echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
+#     exit -1
-fi
+# fi
+# 
-clang-format $@
+# clang-format $@
--- a/new_tutorials/train/segmentation/fast_scnn.py
+++ b/new_tutorials/train/segmentation/fast_scnn.py