Merge pull request #13 from PaddlePaddle/develop

00

Merge pull request #13 from PaddlePaddle/develop
00
d919f669 · SunAhong1993 · GitHub · bc44ce9d · 56acdf95 · d919f669
113 changed file
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@
 [![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleX.svg)](https://github.com/PaddlePaddle/PaddleX/releases)
 ![python version](https://img.shields.io/badge/python-3.6+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
+![QQGroup](https://img.shields.io/badge/QQ_Group-1045148026-52B6EF?style=social&logo=tencent-qq&logoColor=000&logoWidth=20)

 PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习全流程开发工具。具备**全流程打通**、**融合产业实践**、**易用易集成**三大特点。


--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -73,7 +73,11 @@ endif()
 if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
 endif()
-include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+# zlib does not exist in 1.8.1
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
+    include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+endif()
+
 include_directories("${PADDLE_DIR}/third_party/boost")
 include_directories("${PADDLE_DIR}/third_party/eigen3")

@@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
 endif()

-link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+endif()
+
 link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
 link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
@@ -107,6 +114,14 @@ include_directories(${OpenCV_INCLUDE_DIRS})

 if (WIN32)
    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    find_package(OpenMP REQUIRED)
+    if (OPENMP_FOUND)
+        message("OPENMP FOUND")
+        set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} ${OpenMP_C_FLAGS}")
+        set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} ${OpenMP_C_FLAGS}")
+        set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} ${OpenMP_CXX_FLAGS}")
+        set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} ${OpenMP_CXX_FLAGS}")
+    endif()
    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
@@ -186,8 +201,13 @@ if(WITH_STATIC_LIB)
    set(DEPS
        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
-    set(DEPS
-        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    if (NOT WIN32)
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    else()
+      set(DEPS
+          ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
 endif()

 if (NOT WIN32)
@@ -204,13 +224,16 @@ if (NOT WIN32)
 else()
    set(DEPS ${DEPS}
        ${MATH_LIB} ${MKLDNN_LIB}
-        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+        glog gflags_static libprotobuf xxhash libyaml-cppmt)

+    if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
+      set(DEPS ${DEPS} zlibstatic)
+    endif()
    set(DEPS ${DEPS} libcmt shlwapi)
    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
        set(DEPS ${DEPS} snappy)
    endif()
-    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
        set(DEPS ${DEPS} snappystream)
    endif()
 endif(NOT WIN32)
@@ -236,7 +259,9 @@ if(WITH_ENCRYPTION)
      link_directories("${ENCRYPTION_DIR}/lib")
      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
-    message(FATAL_ERROR "Encryption Tool don't support WINDOWS")
+      include_directories("${ENCRYPTION_DIR}/include")
+      link_directories("${ENCRYPTION_DIR}/lib")
+      set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
  endif()
 endif()

@@ -284,10 +309,23 @@ if (WIN32 AND WITH_MKL)
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
-
        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
    )
-
+    # for encryption
+    if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
+        add_custom_command(TARGET classifier POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET detector POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+        add_custom_command(TARGET segmenter POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
+        )
+    endif()
 endif()

 file(COPY  "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"

--- a/deploy/cpp/CMakeSettings.json
+++ b/deploy/cpp/CMakeSettings.json
@@ -22,9 +22,9 @@
                    "type": "PATH"
                },
                {
-                    "name": "CMAKE_BUILD_TYPE",
-                    "value": "Release",
-                    "type": "STRING"
+                    "name": "CUDA_LIB",
+                    "value": "",
+                    "type": "PATH"
                },
                {
                    "name": "WITH_STATIC_LIB",
@@ -40,8 +40,18 @@
                    "name": "WITH_GPU",
                    "value": "False",
                    "type": "BOOL"
+                },
+                {
+                    "name": "WITH_ENCRYPTION",
+                    "value": "False",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "ENCRYPTION_DIR",
+                    "value": "",
+                    "type": "PATH"
                }
            ]
        }
    ]
-}
\ No newline at end of file
+}
--- a/deploy/cpp/cmake/yaml-cpp.cmake
+++ b/deploy/cpp/cmake/yaml-cpp.cmake
-find_package(Git REQUIRED)
-
 include(ExternalProject)

 message("${CMAKE_BUILD_TYPE}")

--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -13,14 +13,19 @@
 // limitations under the License.

 #include <glog/logging.h>
+#include <omp.h>

+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
-
+#include <utility>
 #include "include/paddlex/paddlex.h"

+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -28,6 +33,10 @@ DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");

 int main(int argc, char** argv) {
  // Parsing command-line
@@ -44,32 +53,80 @@ int main(int argc, char** argv) {

  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key);

  // 进行预测
+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  if (FLAGS_image_list != "") {
    std::ifstream inf(FLAGS_image_list);
    if (!inf) {
      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
      return -1;
    }
+    // 多batch预测
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::ClsResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
-      std::cout << "Predict label: " << result.category
-                << ", label_id:" << result.category_id
-                << ", score: " << result.score << std::endl;
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      // 读图像
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::ClsResult> results(im_vec_size - i,
+                                              PaddleX::ClsResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      for (int j = i; j < im_vec_size; ++j) {
+        std::cout << "Path:" << image_paths[j]
+                  << ", predict label: " << results[j - i].category
+                  << ", label_id:" << results[j - i].category_id
+                  << ", score: " << results[j - i].score << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::ClsResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    std::cout << "Predict label: " << result.category
              << ", label_id:" << result.category_id
              << ", score: " << result.score << std::endl;
  }
-
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read time: " << total_imread_time_s / imgs
+            << " s/img, batch_size = " << FLAGS_batch_size << std::endl;
  return 0;
 }
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -13,15 +13,21 @@
 // limitations under the License.

 #include <glog/logging.h>
+#include <omp.h>

+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>

 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"

+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +36,13 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");

 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -43,11 +56,17 @@ int main(int argc, char** argv) {
    std::cerr << "--image or --image_list need to be defined" << std::endl;
    return -1;
  }
-
  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key);

+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  std::string save_dir = "output";
  // 进行预测
@@ -58,47 +77,83 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::DetResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
-      for (int i = 0; i < result.boxes.size(); ++i) {
-        std::cout << "image file: " << image_path
-                  << ", predict label: " << result.boxes[i].category
-                  << ", label_id:" << result.boxes[i].category_id
-                  << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
-                  << result.boxes[i].coordinate[0] << ", "
-                  << result.boxes[i].coordinate[1] << ", "
-                  << result.boxes[i].coordinate[2] << ", "
-                  << result.boxes[i].coordinate[3] << ")" << std::endl;
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::DetResult> results(im_vec_size - i,
+                                              PaddleX::DetResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
+      // 输出结果目标框
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        for (int k = 0; k < results[j].boxes.size(); ++k) {
+          std::cout << "image file: " << image_paths[i + j] << ", ";
+          std::cout << "predict label: " << results[j].boxes[k].category
+                    << ", label_id:" << results[j].boxes[k].category_id
+                    << ", score: " << results[j].boxes[k].score
+                    << ", box(xmin, ymin, w, h):("
+                    << results[j].boxes[k].coordinate[0] << ", "
+                    << results[j].boxes[k].coordinate[1] << ", "
+                    << results[j].boxes[k].coordinate[2] << ", "
+                    << results[j].boxes[k].coordinate[3] << ")" << std::endl;
+        }
      }
-
      // 可视化
-      cv::Mat vis_img =
-          PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
-      std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
-      cv::imwrite(save_path, vis_img);
-      result.clear();
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        cv::Mat vis_img = PaddleX::Visualize(
+            im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);
+        std::string save_path =
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::DetResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
+    // 输出结果目标框
    for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
      std::cout << ", predict label: " << result.boxes[i].category
                << ", label_id:" << result.boxes[i].category_id
-                << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
-                << result.boxes[i].coordinate[0] << ", "
-                << result.boxes[i].coordinate[1] << ", "
+                << ", score: " << result.boxes[i].score
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
+                << ", " << result.boxes[i].coordinate[1] << ", "
                << result.boxes[i].coordinate[2] << ", "
                << result.boxes[i].coordinate[3] << ")" << std::endl;
    }

    // 可视化
    cv::Mat vis_img =
-        PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
+        PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);
    std::string save_path =
        PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
    cv::imwrite(save_path, vis_img);
@@ -106,5 +161,11 @@ int main(int argc, char** argv) {
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }

+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;
+
  return 0;
 }
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -13,15 +13,20 @@
 // limitations under the License.

 #include <glog/logging.h>
+#include <omp.h>

+#include <algorithm>
+#include <chrono>  // NOLINT
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
-
+#include <utility>
 #include "include/paddlex/paddlex.h"
 #include "include/paddlex/visualize.h"

+using namespace std::chrono;  // NOLINT
+
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
@@ -30,6 +35,10 @@ DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");

 int main(int argc, char** argv) {
  // 解析命令行参数
@@ -46,8 +55,15 @@ int main(int argc, char** argv) {

  // 加载模型
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
+  model.Init(FLAGS_model_dir,
+             FLAGS_use_gpu,
+             FLAGS_use_trt,
+             FLAGS_gpu_id,
+             FLAGS_key);

+  double total_running_time_s = 0.0;
+  double total_imread_time_s = 0.0;
+  int imgs = 1;
  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
  // 进行预测
  if (FLAGS_image_list != "") {
@@ -57,23 +73,54 @@ int main(int argc, char** argv) {
      return -1;
    }
    std::string image_path;
+    std::vector<std::string> image_paths;
    while (getline(inf, image_path)) {
-      PaddleX::SegResult result;
-      cv::Mat im = cv::imread(image_path, 1);
-      model.predict(im, &result);
+      image_paths.push_back(image_path);
+    }
+    imgs = image_paths.size();
+    for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
+      auto start = system_clock::now();
+      int im_vec_size =
+          std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
+      std::vector<cv::Mat> im_vec(im_vec_size - i);
+      std::vector<PaddleX::SegResult> results(im_vec_size - i,
+                                              PaddleX::SegResult());
+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
+      #pragma omp parallel for num_threads(thread_num)
+      for (int j = i; j < im_vec_size; ++j) {
+        im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
+      }
+      auto imread_end = system_clock::now();
+      model.predict(im_vec, &results, thread_num);
+      auto imread_duration = duration_cast<microseconds>(imread_end - start);
+      total_imread_time_s += static_cast<double>(imread_duration.count()) *
+                             microseconds::period::num /
+                             microseconds::period::den;
+      auto end = system_clock::now();
+      auto duration = duration_cast<microseconds>(end - start);
+      total_running_time_s += static_cast<double>(duration.count()) *
+                              microseconds::period::num /
+                              microseconds::period::den;
      // 可视化
-      cv::Mat vis_img =
-          PaddleX::Visualize(im, result, model.labels, colormap);
-      std::string save_path =
-          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
-      cv::imwrite(save_path, vis_img);
-      result.clear();
-      std::cout << "Visualized output saved as " << save_path << std::endl;
+      for (int j = 0; j < im_vec_size - i; ++j) {
+        cv::Mat vis_img =
+            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap);
+        std::string save_path =
+            PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
    }
  } else {
+    auto start = system_clock::now();
    PaddleX::SegResult result;
    cv::Mat im = cv::imread(FLAGS_image, 1);
    model.predict(im, &result);
+    auto end = system_clock::now();
+    auto duration = duration_cast<microseconds>(end - start);
+    total_running_time_s += static_cast<double>(duration.count()) *
+                            microseconds::period::num /
+                            microseconds::period::den;
    // 可视化
    cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
    std::string save_path =
@@ -82,6 +129,11 @@ int main(int argc, char** argv) {
    result.clear();
    std::cout << "Visualized output saved as " << save_path << std::endl;
  }
+  std::cout << "Total running time: " << total_running_time_s
+            << " s, average running time: " << total_running_time_s / imgs
+            << " s/img, total read img time: " << total_imread_time_s
+            << " s, average read img time: " << total_imread_time_s / imgs
+            << " s, batch_size = " << FLAGS_batch_size << std::endl;

  return 0;
 }
--- a/deploy/cpp/include/paddlex/config_parser.h
+++ b/deploy/cpp/include/paddlex/config_parser.h
@@ -54,4 +54,4 @@ class ConfigPaser {
  YAML::Node Transforms_;
 };

-}  // namespace PaddleDetection
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -16,8 +16,11 @@

 #include <functional>
 #include <iostream>
+#include <map>
+#include <memory>
 #include <numeric>
-
+#include <string>
+#include <vector>
 #include "yaml-cpp/yaml.h"

 #ifdef _WIN32
@@ -28,19 +31,48 @@

 #include "paddle_inference_api.h"  // NOLINT

-#include "config_parser.h"
-#include "results.h"
-#include "transforms.h"
+#include "config_parser.h"  // NOLINT
+#include "results.h"  // NOLINT
+#include "transforms.h"  // NOLINT

 #ifdef WITH_ENCRYPTION
-#include "paddle_model_decrypt.h"
-#include "model_code.h"
+#include "paddle_model_decrypt.h"  // NOLINT
+#include "model_code.h"  // NOLINT
 #endif

 namespace PaddleX {

+/*
+ * @brief
+ * This class encapsulates all necessary proccess steps of model infering, which
+ * include image matrix preprocessing, model predicting and results postprocessing.
+ * The entire process of model infering can be simplified as below:
+ * 1. preprocess image matrix (resize, padding, ......)
+ * 2. model infer
+ * 3. postprocess the results which generated from model infering
+ *
+ * @example
+ *  PaddleX::Model cls_model;
+ *  // initialize model configuration
+ *  cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key);
+ *  // define a Classification result object
+ *  PaddleX::ClsResult cls_result;
+ *  // get image matrix from image file
+ *  cv::Mat im = cv::imread(image_file_path, 1);
+ *  cls_model.predict(im, &cls_result);
+ * */
 class Model {
 public:
+  /*
+   * @brief
+   * This method aims to initialize the model configuration
+   *
+   * @param model_dir: the directory which contains model.yml
+   * @param use_gpu: use gpu or not when infering
+   * @param use_trt: use Tensor RT or not when infering
+   * @param gpu_id: the id of gpu when infering with using gpu
+   * @param key: the key of encryption when using encrypted model
+   * */
  void Init(const std::string& model_dir,
            bool use_gpu = false,
            bool use_trt = false,
@@ -55,26 +87,134 @@ class Model {
                        int gpu_id = 0,
                        std::string key = "");

-  bool load_config(const std::string& model_dir);
-
+  /*
+   * @brief
+   * This method aims to load model configurations which include
+   * transform steps and label list
+   *
+   * @param yaml_input:  model configuration string
+   * @return true if load configuration successfully
+   * */
+  bool load_config(const std::string& yaml_input);
+
+  /*
+   * @brief
+   * This method aims to transform single image matrix, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im: single image matrix to be transformed
+   * @param blob: the raw data of single image matrix after transformed
+   * @return true if preprocess image matrix successfully
+   * */
  bool preprocess(const cv::Mat& input_im, ImageBlob* blob);

+  /*
+   * @brief
+   * This method aims to transform mutiple image matrixs, the result will be
+   * returned at second parameter.
+   *
+   * @param input_im_batch: a batch of image matrixs to be transformed
+   * @param blob_blob: raw data of a batch of image matrixs after transformed
+   * @param thread_num: the number of preprocessing threads,
+   *                    each thread run preprocess on single image matrix
+   * @return true if preprocess a batch of image matrixs successfully
+   * */
+  bool preprocess(const std::vector<cv::Mat> &input_im_batch,
+                  std::vector<ImageBlob> *blob_batch,
+                  int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute classification model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: classification prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, ClsResult* result);

+  /*
+   * @brief
+   * This method aims to execute classification model prediction on a batch of image matrixs,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrixs to be predicted
+   * @param results: a batch of classification prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<ClsResult> *results,
+               int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on single image matrix, the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, DetResult* result);

+  /*
+   * @brief
+   * This method aims to execute detection or instance segmentation model prediction
+   * on a batch of image matrixs, the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: detection or instance segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<DetResult> *result,
+               int thread_num = 1);
+
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on single image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: single image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @return true if predict successfully
+   * */
  bool predict(const cv::Mat& im, SegResult* result);

-  bool postprocess(SegResult* result);
-
-  bool postprocess(DetResult* result);
-
+  /*
+   * @brief
+   * This method aims to execute segmentation model prediction on a batch of image matrix,
+   * the result will be returned at second parameter.
+   *
+   * @param im: a batch of image matrix to be predicted
+   * @param result: segmentation prediction result data after postprocessed
+   * @param thread_num: the number of predicting threads, each thread run prediction
+   *                    on single image matrix
+   * @return true if predict successfully
+   * */
+  bool predict(const std::vector<cv::Mat> &im_batch,
+               std::vector<SegResult> *result,
+               int thread_num = 1);
+
+  // model type, include 3 type: classifier, detector, segmenter
  std::string type;
+  // model name, such as FasterRCNN, YOLOV3 and so on.
  std::string name;
  std::map<int, std::string> labels;
+  // transform(preprocessing) pipeline manager
  Transforms transforms_;
+  // single input preprocessed data
  ImageBlob inputs_;
+  // batch input preprocessed data
+  std::vector<ImageBlob> inputs_batch_;
+  // raw data of predicting results
  std::vector<float> outputs_;
+  // a predictor which run the model predicting
  std::unique_ptr<paddle::PaddlePredictor> predictor_;
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
@@ -20,9 +20,15 @@

 namespace PaddleX {

+/*
+ * @brief
+ * This class represents mask in instance segmentation tasks.
+ * */
 template <class T>
 struct Mask {
+  // raw data of mask
  std::vector<T> data;
+  // the shape of mask
  std::vector<int> shape;
  void clear() {
    data.clear();
@@ -30,19 +36,34 @@ struct Mask {
  }
 };

+/*
+ * @brief 
+ * This class represents target box in detection or instance segmentation tasks.
+ * */
 struct Box {
  int category_id;
+  // category label this box belongs to
  std::string category;
+  // confidence score
  float score;
  std::vector<float> coordinate;
  Mask<float> mask;
 };

+/*
+ * @brief
+ * This class is prediction result based class.
+ * */
 class BaseResult {
 public:
+  // model type
  std::string type = "base";
 };

+/*
+ * @brief
+ * This class represent classification result.
+ * */
 class ClsResult : public BaseResult {
 public:
  int category_id;
@@ -51,17 +72,28 @@ class ClsResult : public BaseResult {
  std::string type = "cls";
 };

+/*
+ * @brief
+ * This class represent detection or instance segmentation result.
+ * */
 class DetResult : public BaseResult {
 public:
+  // target boxes
  std::vector<Box> boxes;
  int mask_resolution;
  std::string type = "det";
  void clear() { boxes.clear(); }
 };

+/*
+ * @brief
+ * This class represent segmentation result.
+ * */
 class SegResult : public BaseResult {
 public:
+  // represent label of each pixel on image matrix
  Mask<int64_t> label_map;
+  // represent score of each pixel on image matrix
  Mask<float> score_map;
  std::string type = "seg";
  void clear() {

--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -28,7 +28,10 @@

 namespace PaddleX {

-// Object for storing all preprocessed data
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
 class ImageBlob {
 public:
  // Original image height and width
@@ -45,21 +48,34 @@ class ImageBlob {
  std::vector<float> im_data_;

  void clear() {
-    ori_im_size_.clear();
-    new_im_size_.clear();
    im_size_before_resize_.clear();
    reshape_order_.clear();
    im_data_.clear();
  }
 };

-// Abstraction of preprocessing opration class
+/*
+ * @brief
+ * Abstraction of preprocessing operation class
+ * */
 class Transform {
 public:
  virtual void Init(const YAML::Node& item) = 0;
+  /*
+   * @brief
+   * This method executes preprocessing operation on image matrix,
+   * result will be returned at second parameter.
+   * @param im: single image matrix to be preprocessed
+   * @param data: the raw data of single image matrix after preprocessed
+   * @return true if transform successfully
+   * */
  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
 };

+/*
+ * @brief
+ * This class execute normalization operation on image matrix
+ * */
 class Normalize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -74,6 +90,14 @@ class Normalize : public Transform {
  std::vector<float> std_;
 };

+/*
+ * @brief
+ * This class execute resize by short operation on image matrix. At first, it resizes
+ * the short side of image matrix to specified length. Accordingly, the long side
+ * will be resized in the same proportion. If new length of long side exceeds max
+ * size, the long size will be resized to max size, and the short size will be
+ * resized in the same proportion
+ * */
 class ResizeByShort : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -92,6 +116,12 @@ class ResizeByShort : public Transform {
  int max_size_;
 };

+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
 class ResizeByLong : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -103,13 +133,20 @@ class ResizeByLong : public Transform {
  int long_size_;
 };

+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
 class Resize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
+    if (item["interp"].IsDefined()) {
+      interp_ = item["interp"].as<std::string>();
+    }
    if (item["target_size"].IsScalar()) {
      height_ = item["target_size"].as<int>();
      width_ = item["target_size"].as<int>();
-      interp_ = item["interp"].as<std::string>();
    } else if (item["target_size"].IsSequence()) {
      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
      width_ = target_size[0];
@@ -128,6 +165,11 @@ class Resize : public Transform {
  std::string interp_;
 };

+/*
+ * @brief
+ * This class execute center crop operation on image matrix. It crops the center
+ * of image matrix accroding to specified size.
+ * */
 class CenterCrop : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -147,6 +189,11 @@ class CenterCrop : public Transform {
  int width_;
 };

+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
 class Padding : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
@@ -175,7 +222,11 @@ class Padding : public Transform {
  int width_ = 0;
  int height_ = 0;
 };
-
+/*
+ * @brief
+ * This class is transform operations manager. It stores all neccessary
+ * transform operations and run them in correct order.
+ * */
 class Transforms {
 public:
  void Init(const YAML::Node& node, bool to_rgb = true);

--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
@@ -43,20 +43,55 @@

 namespace PaddleX {

-// Generate visualization colormap for each class
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
 std::vector<int> GenerateColorMap(int num_class);

+
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const DetResult& results,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap,
                     float threshold = 0.5);

+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
 cv::Mat Visualize(const cv::Mat& img,
                     const SegResult& result,
                     const std::map<int, std::string>& labels,
                     const std::vector<int>& colormap);

+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
 std::string generate_save_path(const std::string& save_dir,
                               const std::string& file_path);
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/cpp/scripts/build.sh
+++ b/deploy/cpp/scripts/build.sh
@@ -4,10 +4,10 @@ WITH_GPU=OFF
 WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
-# TensorRT 的路径
-TENSORRT_DIR=/path/to/TensorRT/
-# Paddle 预测库路径
-PADDLE_DIR=/docker/jiangjiajun/PaddleDetection/deploy/cpp/fluid_inference
+# TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
+TENSORRT_DIR=/root/projects/TensorRT/
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
+PADDLE_DIR=/root/projects/fluid_inference
 # Paddle 的预测库是否使用静态库来编译
 # 使用TensorRT时，Paddle的预测库通常为动态库
 WITH_STATIC_LIB=OFF
@@ -16,7 +16,7 @@ CUDA_LIB=/usr/local/cuda/lib64
 # CUDNN 的 lib 路径
 CUDNN_LIB=/usr/local/cuda/lib64

-# 是否加载加密后的模型 
+# 是否加载加密后的模型
 WITH_ENCRYPTION=ON
 # 加密工具的路径, 如果使用自带预编译版本可不修改
 sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具

--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
  if (width_ > 1 & height_ > 1) {
    padding_w = width_ - im->cols;
    padding_h = height_ - im->rows;
-  } else if (coarsest_stride_ > 1) {
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
    padding_h =
-        ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
    padding_w =
-        ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
  }

  if (padding_h < 0 || padding_w < 0) {
@@ -219,4 +221,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
  }
  return true;
 }
+
 }  // namespace PaddleX
--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
@@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir,
  std::string image_name(file_path.substr(pos + 1));
  return save_dir + OS_PATH_SEP + image_name;
 }
-}  // namespace of PaddleX
+}  // namespace PaddleX
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -13,7 +13,7 @@
 > 可以使用模型裁剪，参考文档[模型裁剪使用教程](slim/prune.md)，通过调整裁剪参数，可以控制模型裁剪后的大小，在实际实验中，如VOC检测数据，使用yolov3-mobilenet，原模型大小为XXM，裁剪后为XX M，精度基本保持不变

 ## 4. 如何配置训练时GPU的卡数
-> 通过在终端export环境变量，或在Python代码中设置，可参考文档[CPU/多卡GPU训练](gpu_configure.md)
+> 通过在终端export环境变量，或在Python代码中设置，可参考文档[CPU/多卡GPU训练](appendix/gpu_configure.md)

 ## 5. 想将之前训练的模型参数上继续训练
 > 在训练调用`train`接口时，将`pretrain_weights`设为之前的模型保存路径即可
@@ -52,7 +52,7 @@
 > 1. 用户自行训练时，如不确定迭代的轮数，可以将轮数设高一些，同时注意设置`save_interval_epochs`，这样模型迭代每间隔相应轮数就会在验证集上进行评估和保存，可以根据不同轮数模型在验证集上的评估指标，判断模型是否已经收敛，若模型已收敛，可以自行结束训练进程
 >
 ## 9. 只有CPU，没有GPU，如何提升训练速度
-> 当没有GPU时，可以根据自己的CPU配置，选择是否使用多CPU进行训练，具体配置方式可以参考文档[多卡CPU/GPU训练](gpu_configure.md)
+> 当没有GPU时，可以根据自己的CPU配置，选择是否使用多CPU进行训练，具体配置方式可以参考文档[多卡CPU/GPU训练](appendix/gpu_configure.md)
 >
 ## 10. 电脑不能联网，训练时因为下载预训练模型失败，如何解决
 > 可以预先通过其它方式准备好预训练模型，然后训练时自定义`pretrain_weights`即可，可参考文档[无联网模型训练](how_to_offline_run.md)
@@ -61,8 +61,8 @@
 > 1.可以按照9的方式来解决这个问题  
 > 2.每次训练前都设定`paddlex.pretrain_dir`路径，如设定`paddlex.pretrain_dir='/usrname/paddlex`，如此下载完的预训练模型会存放至`/usrname/paddlex`目录下，而已经下载在该目录的模型也不会再次重复下载

-## 12. 程序启动时提示"Failed to execute script PaddleX"，如何解决？
+## 12. PaddleX GUI启动时提示"Failed to execute script PaddleX"，如何解决？
 > 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径，请尝试将程序移动到英文目录。
 > 2. 如果您的系统是Windows 7或者Windows Server 2012时，原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL，请按如下方式安装桌面体验：通过“我的电脑”-->“属性”-->"管理"打开服务器管理器，点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”，拖动滚动条到最下端，点开“用户界面和基础结构”，勾选“桌面体验”后点击“安装”，等安装完成尝试再次运行PaddleX。
 > 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中，如有请退出或者重启机器看是否解决
-> 4. 请确认运行程序的用户是否有管理员权限，如非管理员权限用户请尝试使用管理员运行看是否成功
\ No newline at end of file
+> 4. 请确认运行程序的用户是否有管理员权限，如非管理员权限用户请尝试使用管理员运行看是否成功
--- a/docs/apis/datasets/detection.md
+++ b/docs/apis/datasets/detection.md
@@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,

 > 仅用于**目标检测**。读取PascalVOC格式的检测数据集，并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)  

-> 示例：[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29)
+> 示例：[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29)

 > **参数**

@@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式，支持'thread'线程和'process'进程两种方式。默认为'process'（Windows和Mac下会强制使用thread，该参数无效）。  
 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。  

+> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检，定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可：
+> ```
+> add_negative_samples(image_dir)
+> ```
+> > 示例：[代码](../../tuning_strategy/detection/negatives_training.html#id4)
+
+> > **参数**
+
+> > > * **image_dir** (str): 背景图片所在的目录路径。
+
 ## CocoDetection类

 ```
@@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers=
 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式，支持'thread'线程和'process'进程两种方式。默认为'process'（Windows和Mac下会强制使用thread，该参数无效）。  
 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。  

+> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检，定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可：
+> ```
+> add_negative_samples(image_dir)
+> ```
+> > 示例：[代码](../../tuning_strategy/detection/negatives_training.html#id4)
+
+> > **参数**
+
+> > > * **image_dir** (str): 背景图片所在的目录路径。
+
 ## EasyDataDet类

 ```
@@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n
 > > * **num_workers** (int|str)：数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时，根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8，则`num_workers`为8，否则为CPU核数的一半。
 > > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度，以样本数为单位。默认为100。  
 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式，支持'thread'线程和'process'进程两种方式。默认为'process'（Windows和Mac下会强制使用thread，该参数无效）。  
-> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。  
+> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
+
+
+> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检，定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可：
+> ```
+> add_negative_samples(image_dir)
+> ```
+> > 示例：[代码](../../tuning_strategy/detection/negatives_training.html#id4)
+
+> > **参数**

+> > > * **image_dir** (str): 背景图片所在的目录路径。
--- a/docs/apis/models/classification.md
+++ b/docs/apis/models/classification.md
@@ -80,7 +80,7 @@ predict(self, img_file, transforms=None, topk=5)

 ## 其它分类器类

-PaddleX提供了共计22种分类器，所有分类器均提供同`ResNet50`相同的训练`train`，评估`evaluate`和预测`predict`接口，各模型效果可参考[模型库](../appendix/model_zoo.md)。
+PaddleX提供了共计22种分类器，所有分类器均提供同`ResNet50`相同的训练`train`，评估`evaluate`和预测`predict`接口，各模型效果可参考[模型库](https://paddlex.readthedocs.io/zh_CN/latest/appendix/model_zoo.html)。

 ### ResNet18
 ```python

--- a/docs/apis/models/detection.md
+++ b/docs/apis/models/detection.md
@@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
 > > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为20。
 > > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
-> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为None，则不使用预训练模型。默认为None。
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为字符串'COCO'，则自动下载在COCO数据集上预训练的模型权重；若为None，则不使用预训练模型。默认为None。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认优化器：fluid.layers.piecewise_decay衰减策略，fluid.optimizer.Momentum优化方法。
 > > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。
 > > - **warmup_steps** (int):  默认优化器进行warmup过程的步数。默认为1000。
@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa
 > > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为1。
 > > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
-> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为None，则不使用预训练模型。默认为None。
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为字符串'COCO'，则自动下载在COCO数据集上预训练的模型权重（注意：暂未提供ResNet18的COCO预训练模型）；为None，则不使用预训练模型。默认为None。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认优化器：fluid.layers.piecewise_decay衰减策略，fluid.optimizer.Momentum优化方法。
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。
 > > - **warmup_steps** (int):  默认优化器进行warmup过程的步数。默认为500。

--- a/docs/apis/models/instance_segmentation.md
+++ b/docs/apis/models/instance_segmentation.md
@@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa
 > > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为1。
 > > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
-> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为None，则不使用预训练模型。默认为None。
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为字符串'COCO'，则自动下载在COCO数据集上预训练的模型权重（注意：暂未提供ResNet18和HRNet_W18的COCO预训练模型）；若为None，则不使用预训练模型。默认为None。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认优化器：fluid.layers.piecewise_decay衰减策略，fluid.optimizer.Momentum优化方法。
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。
 > > - **warmup_steps** (int):  默认优化器进行warmup过程的步数。默认为500。

--- a/docs/apis/models/semantic_segmentation.md
+++ b/docs/apis/models/semantic_segmentation.md
@@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
 > **参数**

 > > - **num_classes** (int): 类别数。
-> > - **backbone** (str): DeepLabv3+的backbone网络，实现特征图的计算，取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']，'MobileNetV2_x1.0'。
+> > - **backbone** (str): DeepLabv3+的backbone网络，实现特征图的计算，取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']，默认值为'MobileNetV2_x1.0'。
 > > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数，一般取值为8或16。默认16。
 > > - **aspp_with_sep_conv** (bool):  decoder模块是否采用separable convolutions。默认True。
 > > - **decoder_use_sep_conv** (bool)： decoder模块是否采用separable convolutions。默认True。
@@ -40,12 +40,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
 > > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为1。
 > > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
 > > - **save_dir** (str): 模型保存路径。默认'output'
-> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为None，则不使用预训练模型。默认'IMAGENET'。
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet图片数据上预训练的模型权重；若为字符串'COCO'，则自动下载在COCO数据集上预训练的模型权重（注意：暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型）；若为字符串'CITYSCAPES'，则自动下载在CITYSCAPES数据集上预训练的模型权重（注意：暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型）；若为None，则不使用预训练模型。默认'IMAGENET'。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认的优化器：使用fluid.optimizer.Momentum优化方法，polynomial的学习率衰减策略。
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
+> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
 > > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
 > > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。
@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
+> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
 > > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。
@@ -186,10 +186,10 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal
 > **参数**

 > > - **num_classes** (int): 类别数。
-> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
+> > - **width** (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。'18_small_v1'是18的轻量级版本。
 > > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数，只能用于两类分割。可与dice loss同时使用。默认False。
 > > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
-> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
+> > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。

 ### train 训练接口
@@ -209,12 +209,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
 > > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为1。
 > > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
 > > - **save_dir** (str): 模型保存路径。默认'output'
-> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet数据集上预训练的模型权重；若为None，则不使用预训练模型。默认'IMAGENET'。
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'IMAGENET'，则自动下载在ImageNet数据集上预训练的模型权重；若为字符串'CITYSCAPES'，则自动下载在CITYSCAPES图片数据上预训练的模型权重（注意：目前仅提供`width`取值为18的CITYSCAPES预训练模型）；若为None，则不使用预训练模型。默认'IMAGENET'。
 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认的优化器：使用fluid.optimizer.Momentum优化方法，polynomial的学习率衰减策略。
 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
-> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
+> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
 > > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。
@@ -258,3 +258,88 @@ predict(self, im_file, transforms=None):
 > **返回值**
 > >
 > > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，像素值表示对应的类别，'score_map'存储各类别的概率，shape=(h, w, num_classes)。
+
+
+## FastSCNN类
+
+```python
+paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
+```
+
+> 构建FastSCNN分割器。
+
+> **参数**
+
+> > - **num_classes** (int): 类别数。
+> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数，只能用于两类分割。可与dice loss同时使用。默认False。
+> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
+> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
+> > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
+> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss，即默认值为[1.0]。也支持计算两个分支或三个分支上的loss，权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列，fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重，higher_branch_weight为空间细节分支上的loss权重，lower_branch_weight为全局上下文分支上的loss权重，若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
+
+### train 训练接口
+
+```python
+train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
+```
+
+> FastSCNN模型训练接口。
+
+> **参数**
+> >
+> > - **num_epochs** (int): 训练迭代轮数。
+> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
+> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
+> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
+> > - **save_interval_epochs** (int): 模型保存间隔（单位：迭代轮数）。默认为1。
+> > - **log_interval_steps** (int): 训练日志输出间隔（单位：迭代次数）。默认为2。
+> > - **save_dir** (str): 模型保存路径。默认'output'
+> > - **pretrain_weights** (str): 若指定为路径时，则加载路径下预训练模型；若为字符串'CITYSCAPES'，则自动下载在CITYSCAPES图片数据上预训练的模型权重；若为None，则不使用预训练模型。默认'CITYSCAPES'。
+> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时，使用默认的优化器：使用fluid.optimizer.Momentum优化方法，polynomial的学习率衰减策略。
+> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
+> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
+> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
+> > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
+> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。
+> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None，则不会恢复训练。默认值为None。
+
+#### evaluate 评估接口
+
+```
+evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
+```
+
+> FastSCNN模型评估接口。
+
+> **参数**
+> >
+> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
+> > - **batch_size** (int): 评估时的batch大小。默认1。
+> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
+> > - **return_details** (bool): 是否返回详细信息。默认False。
+
+> **返回值**
+> >
+> > - **dict**: 当return_details为False时，返回dict。包含关键字：'miou'、'category_iou'、'macc'、
+> >   'category_acc'和'kappa'，分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
+> > - **tuple** (metrics, eval_details)：当return_details为True时，增加返回dict (eval_details)，
+> >   包含关键字：'confusion_matrix'，表示评估的混淆矩阵。
+
+#### predict 预测接口
+
+```
+predict(self, im_file, transforms=None):
+```
+
+> FastSCNN模型预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`predict`接口时，用户需要再重新定义test_transforms传入给`predict`接口。
+
+> **参数**
+> >
+> > - **img_file** (str): 预测图像路径。
+> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
+
+> **返回值**
+> >
+> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，像素值表示对应的类别，'score_map'存储各类别的概率，shape=(h, w, num_classes)。
--- a/docs/apis/transforms/seg_transforms.md
+++ b/docs/apis/transforms/seg_transforms.md
@@ -200,7 +200,7 @@ ComposedSegTransforms.add_augmenters(augmenters)
 import paddlex as pdx
 from paddlex.seg import transforms
 train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
-eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
+eval_transforms = transforms.ComposedSegTransforms(mode='eval')

 # 添加数据增强
 import imgaug.augmenters as iaa

--- a/docs/apis/visualize.md
+++ b/docs/apis/visualize.md
@@ -146,10 +146,11 @@ paddlex.interpret.normlime(img_file,
                           dataset=None,
                           num_samples=3000, 
                           batch_size=50,
-                           save_dir='./')
+                           save_dir='./',
+                           normlime_weights_file=None)
 ```
 使用NormLIME算法将模型预测结果的可解释性可视化。
-NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测试样本的LIME结果，然后对相同的特征进行权重的归一化，这样来得到一个全局的输入和输出的关系。
+NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大，此处采用一种简化的方式：使用一定数量的测试样本（目前默认使用所有测试样本），对每个样本进行特征提取，映射到同一个特征空间；然后以此特征做为输入，以模型输出做为输出，使用线性回归对其进行拟合，得到一个全局的输入和输出的关系。之后，对一测试样本进行解释时，使用NormLIME全局的解释，来对LIME的结果进行滤波，使最终的可视化结果更加稳定。

 **注意：** 可解释性结果可视化目前只支持分类模型。

@@ -159,9 +160,27 @@ NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会
 >* **dataset** (paddlex.datasets): 数据集读取器，默认为None。
 >* **num_samples** (int): LIME用于学习线性模型的采样数，默认为3000。
 >* **batch_size** (int): 预测数据batch大小，默认为50。
->* **save_dir** (str): 可解释性可视化结果（保存为png格式文件）和中间文件存储路径。 
+>* **save_dir** (str): 可解释性可视化结果（保存为png格式文件）和中间文件存储路径。
+>* **normlime_weights_file** (str): NormLIME初始化文件名，若不存在，则计算一次，保存于该路径；若存在，则直接载入。

-**注意：** dataset`读取的是一个数据集，该数据集不宜过大，否则计算时间会较长，但应包含所有类别的数据。
+**注意：** dataset`读取的是一个数据集，该数据集不宜过大，否则计算时间会较长，但应包含所有类别的数据。NormLIME可解释性结果可视化目前只支持分类模型。
 ### 使用示例
 > 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。

+
+## 数据预处理/增强过程可视化
+```
+paddlex.transforms.visualize(dataset, 
+                             img_count=3, 
+                             save_dir='vdl_output')
+```
+对数据预处理/增强中间结果进行可视化。
+可使用VisualDL查看中间结果：
+1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001
+2. 浏览器打开 https://0.0.0.0:8001即可，
+    其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
+
+### 参数
+>* **dataset** (paddlex.datasets): 数据集读取器。
+>* **img_count** (int): 需要进行数据预处理/增强的图像数目。默认为3。
+>* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。
\ No newline at end of file
--- a/docs/appendix/index.rst
+++ b/docs/appendix/index.rst
@@ -7,6 +7,7 @@
   :caption: 目录:

   model_zoo.md
+   slim_model_zoo.md
   metrics.md
   interpret.md
   parameters.md

--- a/docs/appendix/interpret.md
+++ b/docs/appendix/interpret.md
@@ -20,9 +20,20 @@ LIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/Paddl
 ## NormLIME
 NormLIME是在LIME上的改进，LIME的解释是局部性的，是针对当前样本给的特定解释，而NormLIME是利用一定数量的样本对当前样本的一个全局性的解释，有一定的降噪效果。其实现步骤如下所示：  
 1. 下载Kmeans模型参数和ResNet50_vc网络前三层参数。（ResNet50_vc的参数是在ImageNet上训练所得网络的参数；使用ImageNet图像作为数据集，每张图像从ResNet50_vc的第三层输出提取对应超象素位置上的平均特征和质心上的特征，训练将得到此处的Kmeans模型）  
-2. 计算测试集中每张图像的LIME结果。（如无测试集，可用验证集代替）  
-3. 使用Kmeans模型对所有图像中的所有像素进行聚类。  
-4. 对在同一个簇的超像素（相同的特征）进行权重的归一化，得到每个超像素的权重，以此来解释模型。  
+2. 使用测试集中的数据计算normlime的权重信息（如无测试集，可用验证集代替）:  
+    对每张图像的处理：
+    (1) 获取图像的超像素。
+    (2) 使用ResNet50_vc获取第三层特征，针对每个超像素位置，组合质心特征和均值特征`F`。  
+    (3) 把`F`作为Kmeans模型的输入，计算每个超像素位置的聚类中心。  
+    (4) 使用训练好的分类模型，预测该张图像的`label`。  
+    对所有图像的处理：  
+    (1) 以每张图像的聚类中心信息组成的向量（若某聚类中心出现在盖章途中设置为1，反之为0）为输入，
+        预测的`label`为输出，构建逻辑回归函数`regression_func`。  
+    (2) 由`regression_func`可获得每个聚类中心不同类别下的权重，并对权重进行归一化。  
+3. 使用Kmeans模型获取需要可视化图像的每个超像素的聚类中心。  
+4. 对需要可视化的图像的超像素进行随机遮掩构成新的图像。   
+5. 对每张构造的图像使用预测模型预测label。  
+6. 根据normlime的权重信息，每个超像素可获不同的权重，选取最高的权重为最终的权重，以此来解释模型。   

 NormLIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)和[api介绍](../apis/visualize.html#normlime)。在使用时，参数中的`num_samples`设置尤为重要，其表示上述步骤2中的随机采样的个数，若设置过小会影响可解释性结果的稳定性，若设置过大则将在上述步骤3耗费较长时间；参数`batch_size`则表示在计算上述步骤3时，预测的batch size，若设置过小将在上述步骤3耗费较长时间，而上限则根据机器配置决定；而`dataset`则是由测试集或验证集构造的数据。  


--- a/docs/appendix/model_zoo.md
+++ b/docs/appendix/model_zoo.md
@@ -6,48 +6,56 @@

 | 模型  | 模型大小 | 预测速度（毫秒） | Top1准确率（%） | Top5准确率（%） |
 | :----|  :------- | :----------- | :--------- | :--------- |
-| ResNet18| 46.9MB   | -        | 71.0     | 89.9     |
-| ResNet34| 87.5MB   | -        | 74.6    | 92.1    |
-| ResNet50| 102.7MB  | -        | 76.5     | 93.0     |
-| ResNet101 |179.1MB  | -      | 77.6     | 93.6  |
-| ResNet50_vd |102.8MB  |-        | 79.1     | 94.4     |
-| ResNet101_vd| 179.2MB  | -       | 80.2   | 95.0     |
-| ResNet50_vd_ssld |102.8MB  | -        | 82.4     | 96.1     |
-| ResNet101_vd_ssld| 179.2MB  | -       | 83.7   | 96.7     |
-| DarkNet53|166.9MB  | -       | 78.0     | 94.1     |
-| MobileNetV1 | 16.0MB   | -        | 71.0     | 89.7    |
-| MobileNetV2 | 14.0MB   | -        | 72.2     | 90.7    |
-| MobileNetV3_large|  21.0MB   | -        | 75.3    | 93.2   |
-| MobileNetV3_small |  12.0MB   | -        | 68.2    | 88.1     |
-| MobileNetV3_large_ssld|  21.0MB   | -        | 79.0     | 94.5     |
-| MobileNetV3_small_ssld |  12.0MB   | -        | 71.3     | 90.1     |
-| Xception41 |92.4MB   | -       | 79.6    | 94.4     |
-| Xception65 | 144.6MB  | -       | 80.3     | 94.5     |
-| DenseNet121 | 32.8MB   | -       | 75.7     | 92.6     |
-| DenseNet161|116.3MB  | -       | 78.6     | 94.1     |
-| DenseNet201|  84.6MB   | -       | 77.6     | 93.7     |
-| ShuffleNetV2 | 9.0MB   | -        | 68.8     | 88.5     |
-| HRNet_W18 | 21.29MB | - | 76.9 | 93.4 |
+| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB   | 3.72882        | 71.0     | 89.9     |
+| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB   | 5.50876        | 74.6    | 92.1    |
+| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB  | 7.76659       | 76.5     | 93.0     |
+| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB  | 13.80876      | 77.6     | 93.6  |
+| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB  | 8.20476       | 79.1     | 94.4     |
+| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB  | 14.24643       | 80.2   | 95.0     |
+| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB  | 7.79264       | 82.4     | 96.1     |
+| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB  | 13.34580       | 83.7   | 96.7     |
+| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB  | 8.82047       | 78.0     | 94.1     |
+| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB   | 3.42838        | 71.0     | 89.7    |
+| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB   | 5.92667        | 72.2     | 90.7    |
+| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)|  22.8MB   | 8.31428        | 75.3    | 93.2   |
+| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) |  12.5MB   | 7.30689        | 68.2    | 88.1     |
+| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)|  22.8MB   | 8.06651        | 79.0     | 94.5     |
+| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) |  12.5MB   | 7.08837        | 71.3     | 90.1     |
+| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB   | 8.15611      | 79.6    | 94.4     |
+| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB  | 13.87017       | 80.3     | 94.5     |
+| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB   | 17.09874       | 75.7     | 92.6     |
+| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB  | 22.79690       | 78.6     | 94.1     |
+| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)|  84.1MB   | 25.26089       | 77.6     | 93.7     |
+| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB   | 15.40138        | 68.8     | 88.5     |
+| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514  | 76.9 | 93.4 |

 ## 目标检测模型

-> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
+> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla V100测试得到）,表中符号`-`表示相关指标暂未测试。

 | 模型    | 模型大小    | 预测时间(毫秒) | BoxAP（%） |
 |:-------|:-----------|:-------------|:----------|
-|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 |
-|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 |
-|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 |
-|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 |
-|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 |
-|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 |
-|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 |
-|FasterRCNN-HRNet_W18-FPN |115.5MB | 57.11 | 36 |
-|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 |
-|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 |
-|YOLOv3-MobileNetv3|94.6MB | - | 31.6 |
-| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 |
+|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 |
+|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 |
+|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 |
+|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 |
+|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 |
+|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 119.788 | 38.7 |
+|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 156.097 | 40.5 |
+|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 |
+|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 |
+|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 |
+|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 |
+| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 |

 ## 实例分割模型

 > 表中模型相关指标均为在MSCOCO数据集上测试得到。
+
+| 模型    | 模型大小    | 预测时间(毫秒) | mIoU（%） |
+|:-------|:-----------|:-------------|:----------|
+|DeepLabv3+-MobileNetV2_x1.0|-| - | - |
+|DeepLabv3+-Xception41|-| - | - |
+|DeepLabv3+-Xception65|-| - | - |
+|UNet|-| - | - |
+|HRNet_w18|-| - | - |
--- a/docs/appendix/slim_model_zoo.md
+++ b/docs/appendix/slim_model_zoo.md
+# PaddleX压缩模型库
+
+## 图像分类
+
+数据集：ImageNet-1000
+
+### 量化
+
+| 模型 | 压缩策略 | Top-1准确率 | 存储体积 | TensorRT时延(V100, ms) |
+|:--:|:---:|:--:|:--:|:--:|
+|MobileNetV1| 无 |70.99%| 17MB | -|
+|MobileNetV1| 量化 |70.18% (-0.81%)| 4.4MB | - |
+| MobileNetV2 | 无 |72.15%| 15MB | - |
+| MobileNetV2 | 量化 | 71.15% (-1%)| 4.0MB   | - |
+|ResNet50| 无 |76.50%| 99MB | 2.71 |
+|ResNet50| 量化 |76.33% (-0.17%)| 25.1MB | 1.19 |
+
+分类模型Lite时延(ms)
+
+| 设备    | 模型类型    | 压缩策略      | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 |
+| ------- | ----------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- |
+| 高通835 | MobileNetV1 | 无 | 96.1942        | 53.2058        | 32.4468        | 88.4955        | 47.95          | 27.5189        |
+| 高通835 | MobileNetV1 | 量化    | 60.5615        | 32.4016        | 16.6596        | 56.5266        | 29.7178        | 15.1459        |
+| 高通835 | MobileNetV2 | 无 | 65.715         | 38.1346        | 25.155         | 61.3593        | 36.2038        | 22.849         |
+| 高通835 | MobileNetV2 | 量化    | 48.3495        | 30.3069        | 22.1506        | 45.8715        | 27.4105        | 18.2223        |
+| 高通835 | ResNet50    | 无 | 526.811        | 319.6486       | 205.8345       | 506.1138       | 335.1584       | 214.8936       |
+| 高通835 | ResNet50    | 量化    | 476.0507       | 256.5963       | 139.7266       | 461.9176       | 248.3795       | 149.353        |
+| 高通855 | MobileNetV1 | 无 | 33.5086        | 19.5773        | 11.7534        | 31.3474        | 18.5382        | 10.0811        |
+| 高通855 | MobileNetV1 | 量化    | 37.0498        | 21.7081        | 11.0779        | 14.0947        | 8.1926         | 4.2934         |
+| 高通855 | MobileNetV2 | 无 | 25.0396        | 15.2862        | 9.6609         | 22.909         | 14.1797        | 8.8325         |
+| 高通855 | MobileNetV2 | 量化    | 28.1631        | 18.3917        | 11.8333        | 16.9399        | 11.1772        | 7.4176         |
+| 高通855 | ResNet50    | 无 | 185.3705       | 113.0825       | 87.0741        | 177.7367       | 110.0433       | 74.4114        |
+| 高通855 | ResNet50    | 量化    | 328.2683       | 201.9937       | 106.744        | 242.6397       | 150.0338       | 79.8659        |
+| 麒麟970 | MobileNetV1 | 无 | 101.2455       | 56.4053        | 35.6484        | 94.8985        | 51.7251        | 31.9511        |
+| 麒麟970 | MobileNetV1 | 量化    | 62.4412        | 32.2585        | 16.6215        | 57.825         | 29.2573        | 15.1206        |
+| 麒麟970 | MobileNetV2 | 无 | 70.4176        | 42.0795        | 25.1939        | 68.9597        | 39.2145        | 22.6617        |
+| 麒麟970 | MobileNetV2 | 量化    | 53.0961        | 31.7987        | 21.8334        | 49.383         | 28.2358        | 18.3642        |
+| 麒麟970 | ResNet50    | 无 | 586.8943       | 344.0858       | 228.2293       | 573.3344       | 351.4332       | 225.8006       |
+| 麒麟970 | ResNet50    | 量化    | 489.6188       | 258.3279       | 142.6063       | 480.0064       | 249.5339       | 138.5284       |
+
+### 剪裁
+
+PaddleLite推理耗时说明：
+
+环境：Qualcomm SnapDragon 845 + armv8
+
+速度指标：Thread1/Thread2/Thread4耗时
+
+
+| 模型 | 压缩策略 | Top-1 | 存储体积 |PaddleLite推理耗时|TensorRT推理速度(FPS)|
+|:--:|:---:|:--:|:--:|:--:|:--:|
+| MobileNetV1 |    无    |         70.99%         |       17MB       | 66.052\35.8014\19.5762|-|
+| MobileNetV1 | 剪裁 -30% |  70.4% (-0.59%)  |       12MB       | 46.5958\25.3098\13.6982|-|
+| MobileNetV1 | 剪裁 -50% | 69.8% (-1.19%) |       9MB        | 37.9892\20.7882\11.3144|-|
+
+## 目标检测
+
+### 量化
+
+数据集： COCO2017
+
+|              模型              |  压缩策略   | 数据集 | Image/GPU | 输入608 Box AP | 存储体积 |   TensorRT时延(V100, ms) |
+| :----------------------------: | :---------: | :----: | :-------: | :------------: | :------------: | :----------: |
+|      MobileNet-V1-YOLOv3       | 无 |  COCO  |     8     |      29.3      |        95MB       |  - |
+|      MobileNet-V1-YOLOv3       | 量化  |  COCO  |     8     |     27.9 (-1.4)|        25MB       | -  |
+|      R34-YOLOv3                | 无 |  COCO  |     8     |      36.2      |        162MB       |  - |
+|      R34-YOLOv3                | 量化  |  COCO  |     8     | 35.7 (-0.5)    |        42.7MB      |  - |
+
+### 剪裁
+
+数据集：Pasacl VOC & COCO2017
+
+PaddleLite推理耗时说明：
+
+环境：Qualcomm SnapDragon 845 + armv8
+
+速度指标：Thread1/Thread2/Thread4耗时
+
+|              模型              |     压缩策略      |   数据集   | Image/GPU | 输入608 Box mmAP | 存储体积 | PaddleLite推理耗时(ms)(608*608) | TensorRT推理速度(FPS)(608*608) |
+| :----------------------------: | :---------------: | :--------: | :-------: | :------------: | :----------: | :--------------: | :--------------: |
+|      MobileNet-V1-YOLOv3       | 无     | Pascal VOC |     8     |      76.2      |      94MB      | 1238\796.943\520.101|60.04|
+|      MobileNet-V1-YOLOv3       | 剪裁 -52.88% | Pascal VOC |     8     |  77.6 (+1.4)   |      31MB      | 602.497\353.759\222.427 |99.36|
+|      MobileNet-V1-YOLOv3       | 无     |    COCO    |     8     |      29.3      |      95MB      |-|-|
+|      MobileNet-V1-YOLOv3       | 剪裁 -51.77% |    COCO    |     8     |  26.0 (-3.3)   |      32MB      |-|73.93|
+
+## 语义分割
+
+数据集：Cityscapes
+
+
+### 量化
+
+|          模型          |  压缩策略   |     mIoU      | 存储体积 |
+| :--------------------: | :---------: | :-----------: | :------------: |
+| DeepLabv3-MobileNetv2 | 无 |     69.81     |      7.4MB       |
+| DeepLabv3-MobileNetv2 | 量化  | 67.59 (-2.22) |      2.1MB       |
+
+图像分割模型Lite时延(ms), 输入尺寸769 x 769
+
+| 设备    | 模型类型               | 压缩策略      | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 |
+| ------- | ---------------------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- |
+| 高通835 | Deeplabv3-MobileNetV2  | 无 | 1282.8126      | 793.2064       | 653.6538       | 1193.9908      | 737.1827       | 593.4522       |
+| 高通835 | Deeplabv3-MobileNetV2  | 量化    | 981.44         | 658.4969       | 538.6166       | 885.3273       | 586.1284       | 484.0018       |
+| 高通855 | Deeplabv3-MobileNetV2  | 无 | 639.4425       | 390.1851       | 322.7014       | 477.7667       | 339.7411       | 262.2847       |
+| 高通855 | Deeplabv3-MobileNetV2  | 量化    | 705.7589       | 474.4076       | 427.2951       | 394.8352       | 297.4035       | 264.6724       |
+| 麒麟970 | Deeplabv3-MobileNetV2  | 无 | 1771.1301      | 1746.0569      | 1222.4805      | 1448.9739      | 1192.4491      | 760.606        |
+| 麒麟970 | Deeplabv3-MobileNetV2  | 量化    | 1320.386       | 918.5328       | 672.2481       | 1020.753       | 820.094        | 591.4114       |
+
+### 剪裁
+
+PaddleLite推理耗时说明：
+
+环境：Qualcomm SnapDragon 845 + armv8
+
+速度指标：Thread1/Thread2/Thread4耗时
+
+
+|   模型    |     压缩方法      |     mIoU      | 存储体积 | PaddleLite推理耗时 | TensorRT推理速度(FPS) |
+| :-------: | :---------------: | :-----------: | :------: | :------------: | :----: |
+| FastSCNN | 无     |     69.64     |       11MB       | 1226.36\682.96\415.664 |39.53|
+| FastSCNN | 剪裁 -47.60% | 66.68 (-2.96) |      5.7MB       | 866.693\494.467\291.748 |51.48|
--- a/docs/cv_solutions.md
+++ b/docs/cv_solutions.md
 # PaddleX视觉方案介绍  

-PaddleX目前提供了4种视觉任务解决方案，分别为图像分类、目标检测、实例分割和语义分割。用户可以根据自己的任务类型按需选取。
+PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉任务提供了包含模型选择、压缩策略选择、部署方案选择在内的解决方案。用户根据自己的需求选择合适的模型，选择合适的压缩策略来减小模型的计算量和存储体积、加速模型预测推理，最后选择合适的部署方案将模型部署在移动端或者服务器端。

-## 图像分类
+## 模型选择
+
+### 图像分类
 图像分类任务指的是输入一张图片，模型预测图片的类别，如识别为风景、动物、车等。

 ![](./images/image_classification.png)

-对于图像分类任务，针对不同的应用场景，PaddleX提供了百度改进的模型，见下表所示
+对于图像分类任务，针对不同的应用场景，PaddleX提供了百度改进的模型，见下表所示:
+> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P40）。
+> 表中CPU预测速度 (测试CPU型号为)。
+> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
+> 测速时模型输入大小为224 x 224，Top1准确率为ImageNet-1000数据集上评估所得。

-|    模型    | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | 准确率 | 备注 |
-| :--------- | :------  | :---------- | :-----------| :-------------  | :----- | :--- |
-| MobileNetV3_small_ssld | 12M | - | - | - | 71.3% |适用于移动端场景 |
-| MobileNetV3_large_ssld | 21M | - | - | - | 79.0% | 适用于移动端/服务端场景 |
-| ResNet50_vd_ssld | 102.8MB | - | - | - | 82.4% | 适用于服务端场景 |
-| ResNet101_vd_ssld | 179.2MB | - | - | - |83.7% | 适用于服务端场景 |
+|    模型    |  模型特点 | 存储体积 | GPU预测速度（毫秒） | CPU(x86)预测速度（毫秒） | 骁龙855(ARM)预测速度 （毫秒）| Top1准确率 |
+| :--------- | :------  | :---------- | :-----------| :-------------  | :-------------  |:--- |
+| MobileNetV3_small_ssld | 轻量高速，适用于追求高速的实时移动端场景 | 12.5MB | 7.08837 | - | 6.546 | 71.3.0% |
+| ShuffleNetV2 | 轻量级模型，精度相对偏低，适用于要求更小存储体积的实时移动端场景 | 10.2MB | 15.40 | - | 10.941 | 68.8% |
+| MobileNetV3_large_ssld | 轻量级模型，在存储方面优势不大，在速度和精度上表现适中，适合于移动端场景 | 22.8MB | 8.06651 | - | 19.803 | 79.0% |
+| MobileNetV2 | 轻量级模型，适用于使用GPU预测的移动端场景 | 15.0MB | 5.92667 | - | 23.318| 72.2 % |
+| ResNet50_vd_ssld | 高精度模型，预测时间较短，适用于大多数的服务器端场景 | 103.5MB | 7.79264 | - | - | 82.4% |
+| ResNet101_vd_ssld | 超高精度模型，预测时间相对较长，适用于有大数据量时的服务器端场景 | 180.5MB | 13.34580 | - | -| 83.7% |
+| Xception65 | 超高精度模型，预测时间更长，在处理较大数据量时有较高的精度，适用于服务器端场景 | 161.6MB | 13.87017 | - | - | 80.3% |

-除上述模型外，PaddleX还支持近20种图像分类模型，模型列表可参考[PaddleX模型库](../appendix/model_zoo.md)
+包括上述模型，PaddleX支持近20种图像分类模型，其余模型可参考[PaddleX模型库](../appendix/model_zoo.md)


-## 目标检测
+### 目标检测
 目标检测任务指的是输入图像，模型识别出图像中物体的位置（用矩形框框出来，并给出框的位置），和物体的类别，如在手机等零件质检中，用于检测外观上的瑕疵等。

 ![](./images/object_detection.png)

 对于目标检测，针对不同的应用场景，PaddleX提供了主流的YOLOv3模型和Faster-RCNN模型，见下表所示
-
-|   模型   | 模型大小  | GPU预测速度 | CPU预测速度 |ARM芯片预测速度 | BoxMAP | 备注 |
-| :------- | :-------  | :---------  | :---------- | :-------------  | :----- | :--- |
-| YOLOv3-MobileNetV1 | 101.2M | - | - | - | 29.3 | |
-| YOLOv3-MobileNetV3 | 94.6M | - | - | - | 31.6 | |
-| YOLOv3-ResNet34 | 169.7M | - | - | - | 36.2 | |
-| YOLOv3-DarkNet53 | 252.4 | - | - | - | 38.9 | |
-
-除YOLOv3模型外，PaddleX同时也支持FasterRCNN模型，支持FPN结构和5种backbone网络，详情可参考[PaddleX模型库](../appendix/model_zoo.md)
-
-## 实例分割
+> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P40）。
+> 表中CPU预测速度 (测试CPU型号为)。
+> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
+> 测速时YOLOv3的输入大小为608 x 608，FasterRCNN的输入大小为800 x 1333，Box mmAP为COCO2017数据集上评估所得。
+
+|   模型   | 模型特点 | 存储体积  | GPU预测速度 | CPU(x86)预测速度（毫秒） | 骁龙855(ARM)预测速度 （毫秒）| Box mmAP |
+| :------- | :-------  | :---------  | :---------- | :-------------  | :-------------  |:--- |
+| YOLOv3-MobileNetV3_larget | 适用于追求高速预测的移动端场景 | 100.7MB | 143.322 | - | - | 31.6 |
+| YOLOv3-MobileNetV1 | 精度相对偏低，适用于追求高速预测的服务器端场景 | 99.2MB| 15.422 | - | - | 29.3 |
+| YOLOv3-DarkNet53 | 在预测速度和模型精度上都有较好的表现，适用于大多数的服务器端场景| 249.2MB | 42.672 | - | - | 38.9 |
+| FasterRCNN-ResNet50-FPN | 经典的二阶段检测器，预测速度相对较慢，适用于重视模型精度的服务器端场景 | 167.MB | 83.189 | - | -| 37.2 |
+| FasterRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | 115.5MB | 81.592 | - | - | 36 |
+| FasterRCNN-ResNet101_vd-FPN | 超高精度模型，预测时间更长，在处理较大数据量时有较高的精度，适用于服务器端场景 | 244.3MB | 156.097 | - | - | 40.5 |
+
+除上述模型外，YOLOv3和Faster RCNN还支持其他backbone，详情可参考[PaddleX模型库](../appendix/model_zoo.md)
+
+### 实例分割
 在目标检测中，模型识别出图像中物体的位置和物体的类别。而实例分割则是在目标检测的基础上，做了像素级的分类，将框内的属于目标物体的像素识别出来。

 ![](./images/instance_segmentation.png)

 PaddleX目前提供了实例分割MaskRCNN模型，支持5种不同的backbone网络，详情可参考[PaddleX模型库](../appendix/model_zoo.md)
-
-|  模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | BoxMAP | SegMAP | 备注 |
-| :---- | :------- | :---------- | :---------- | :-------------  | :----- | :----- | :--- |
-| MaskRCNN-ResNet50_vd-FPN | 185.5M | - | - | - | 39.8 | 35.4 | |
-| MaskRCNN-ResNet101_vd-FPN | 268.6M | - | - | - | 41.4 | 36.8 | |
-
-
-## 语义分割
+> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P40）。
+> 表中CPU预测速度 (测试CPU型号为)。
+> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
+> 测速时MaskRCNN的输入大小为800 x 1333，Box mmAP和Seg mmAP为COCO2017数据集上评估所得。
+
+|  模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度（毫秒） | 骁龙855(ARM)预测速度 （毫秒）| Box mmAP | Seg mmAP |
+| :---- | :------- | :---------- | :---------- | :----- | :----- | :--- |:--- |
+| MaskRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | - | - | - | - | 37.0 | 33.4 |
+| MaskRCNN-ResNet50-FPN | 精度较高，适合大多数的服务器端场景| 185.5M | - | - | - | 37.9 | 34.2 |
+| MaskRCNN-ResNet101_vd-FPN | 高精度但预测时间更长，在处理较大数据量时有较高的精度，适用于服务器端场景 | 268.6M | - | - | - | 41.4 | 36.8 |
+
+### 语义分割
 语义分割用于对图像做像素级的分类，应用在人像分类、遥感图像识别等场景。  

 ![](./images/semantic_segmentation.png)

 对于语义分割，PaddleX也针对不同的应用场景，提供了不同的模型选择，如下表所示
+> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P40）。
+> 表中CPU预测速度 (测试CPU型号为)。
+> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
+> 测速时模型的输入大小为1024 x 2048，mIOU为Cityscapes数据集上评估所得。
+
+| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度（毫秒） | 骁龙855(ARM)预测速度 （毫秒）| mIOU |
+| :---- | :------- | :---------- | :---------- | :----- | :----- |:--- |
+| DeepLabv3p-MobileNetV2_x1.0 | 轻量级模型，适用于移动端场景| - | - | - | 69.8% |
+| HRNet_W18_Small_v1 | 轻量高速，适用于移动端场景 | - | - | - | - |
+| FastSCNN | 轻量高速，适用于追求高速预测的移动端或服务器端场景 | - | - | - | 69.64 |
+| HRNet_W18 | 高精度模型，适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景| - | - | - | 79.36 |
+| DeepLabv3p-Xception65 | 高精度但预测时间更长，在处理较大数据量时有较高的精度，适用于服务器且背景复杂的场景| - | - | - | 79.3% |
+
+## 压缩策略选择
+
+PaddleX提供包含模型剪裁、定点量化的模型压缩策略来减小模型的计算量和存储体积，加快模型部署后的预测速度。使用不同压缩策略在图像分类、目标检测和语义分割模型上的模型精度和预测速度详见以下内容，用户可以选择根据自己的需求选择合适的压缩策略，进一步优化模型的性能。
+
+| 压缩策略 | 策略特点 |
+| :---- | :------- |
+| 量化  | 较为显著地减少模型的存储体积，适用于移动端或服务期端TensorRT部署，在移动端对于MobileNet系列模型有明显的加速效果 |
+| 剪裁 | 能够去除冗余的参数，达到显著减少参数计算量和模型体积的效果，提升模型的预测性能，适用于CPU部署或移动端部署（GPU上无明显加速效果） |
+| 先剪裁后量化 | 可以进一步提升模型的预测性能，适用于移动端或服务器端TensorRT部署 |
+
+### 性能对比
+
+* 表中各指标的格式为XXX/YYY，XXX表示未采取压缩策略时的指标，YYY表示压缩后的指标
+* 分类模型的准确率指的是ImageNet-1000数据集上的Top1准确率(模型输入大小为224x224)，检测模型的准确率指的是COCO2017数据集上的mmAP(模型输入大小为608x608)，分割模型的准确率指的是Cityscapes数据集上mIOU(模型输入大小为769x769)
+* 量化策略中，PaddleLiter推理环境为Qualcomm SnapDragon 855 + armv8，速度指标为Thread4耗时
+* 剪裁策略中，PaddleLiter推理环境为Qualcomm SnapDragon 845 + armv8，速度指标为Thread4耗时
+
+
+| 模型 | 压缩策略 | 存储体积(MB) | 准确率(%) | PaddleLite推理耗时(ms) |
+| :--: | :------: | :------: | :----: | :----------------: |
+| MobileNetV1 | 量化 | 17/4.4 | 70.99/70.18 | 10.0811/4.2934 |
+| MobileNetV1 | 剪裁 -30% | 17/12 | 70.99/70.4 | 19.5762/13.6982 |
+| YOLOv3-MobileNetV1 | 量化 | 95/25 | 29.3/27.9 | - |
+| YOLOv3-MobileNetV1 | 剪裁 -51.77% | 95/25 | 29.3/26 | - |
+| Deeplabv3-MobileNetV2 | 量化 | 7.4/1.8 | 63.26/62.03 | 593.4522/484.0018 |
+| FastSCNN | 剪裁 -47.60% | 11/5.7 | 69.64/66.68 | 415.664/291.748 |
+
+更多模型在不同设备上压缩前后的指标对比详见[PaddleX压缩模型库](appendix/slim_model_zoo.md)
+
+压缩策略的具体使用流程详见[模型压缩](tutorials/compress)
+
+**注意：PaddleX中全部图像分类模型和语义分割模型都支持量化和剪裁操作，目标检测仅有YOLOv3支持量化和剪裁操作。**
+
+## 模型部署
+
+PaddleX提供服务器端python部署、服务器端c++部署、服务器端加密部署、OpenVINO部署、移动端部署共5种部署方案，用户可以根据自己的需求选择合适的部署方案，点击以下链接了解部署的具体流程。

-| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | mIOU | 备注 |
-| :---- | :------- | :---------- | :---------- | :-------------  | :----- | :----- |
-| DeepLabv3p-MobileNetV2_x0.25 | | - | - | - | - | - |
-| DeepLabv3p-MobileNetV2_x1.0 | | - | - | - | - | - |
-| DeepLabv3p-Xception65 | | - | - | - | - | - |
-| UNet | | - | - | - | - | - |
+| 部署方案 | 部署流程 |
+| :------: | :------: |
+| 服务器端python部署 | [部署流程](tutorials/deploy/deploy_server/deploy_python.html)|
+| 服务器端c++部署 | [部署流程](tutorials/deploy/deploy_server/deploy_cpp/) |
+| 服务器端加密部署 | [部署流程](tutorials/deploy/deploy_server/encryption.html) |
+| OpenVINO部署 | [部署流程](tutorials/deploy/deploy_openvino.html) |
+| 移动端部署 | [部署流程](tutorials/deploy/deploy_lite.html) |
--- a/docs/images/lime.png
+++ b/docs/images/lime.png
--- a/docs/images/normlime.png
+++ b/docs/images/normlime.png
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -26,6 +26,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习
   cv_solutions.md
   apis/index.rst
   paddlex_gui/index.rst
+   tuning_strategy/index.rst
   update.md
   FAQ.md
   appendix/index.rst

--- a/docs/tuning_strategy/detection/index.rst
+++ b/docs/tuning_strategy/detection/index.rst
+目标检测
+============================
+
+PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略，用户可根据需求及应用场景使用该策略对模型进行调优。
+
+.. toctree::
+   :maxdepth: 1
+
+   negatives_training.md
+ 
--- a/docs/tuning_strategy/detection/negatives_training.md
+++ b/docs/tuning_strategy/detection/negatives_training.md
+# 通过负样本学习降低误检率
+
+## 应用场景
+
+在背景和目标相似的场景下，模型容易把背景误检成目标。为了降低误检率，可以通过负样本学习来降低误检率，即在训练过程中把无目标真值的图片加入训练。
+
+## 效果对比
+
+* 与基准模型相比，通过负样本学习后的模型**mmAP有3.6%的提升，mAP有0.1%的提升**。
+* 与基准模型相比，通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**。
+
+表1 违禁品验证集上**框级别精度**对比
+
+||mmAP（AP@IoU=0.5:0.95）| mAP (AP@IoU=0.5)|
+|:---|:---|:---|
+|基准模型 | 45.8% | 83% |
+|通过负样本学习后的模型 | 49.4% | 83.1% |
+
+表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比
+
+||违禁品图片级别的召回率| 无违禁品图片级别的误检率|
+|:---|:--------------------|:------------------------|
+|基准模型 | 98.97% | 55.27% |
+|通过负样本学习后的模型 | 97.75% | 5.59% |
+
+【名词解释】
+
+ * 图片级别的召回率：只要在有目标的图片上检测出目标（不论框的个数），该图片被认为召回。批量有目标图片中被召回图片所占的比例，即为图片级别的召回率。
+
+ * 图片级别的误检率：只要在无目标的图片上检测出目标（不论框的个数），该图片被认为误检。批量无目标图片中被误检图片所占的比例，即为图片级别的误检率。
+
+
+## 使用方法
+
+在定义训练所用的数据集之后，使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下：
+
+```
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.ComposedRCNNTransforms(
+    mode='train', min_max_size=[600, 1000])
+eval_transforms = transforms.ComposedRCNNTransforms(
+    mode='eval', min_max_size=[600, 1000])
+
+# 定义训练所用的数据集
+train_dataset = pdx.datasets.CocoDetection(
+    data_dir='jinnan2_round1_train_20190305/restricted/',
+    ann_file='jinnan2_round1_train_20190305/train.json',
+    transforms=train_transforms,
+    shuffle=True,
+    num_workers=2)
+# 训练集中加入无目标背景图片
+train_dataset.add_negative_samples(
+    'jinnan2_round1_train_20190305/normal_train_back/')
+
+# 定义验证所用的数据集
+eval_dataset = pdx.datasets.CocoDetection(
+    data_dir='jinnan2_round1_train_20190305/restricted/',
+    ann_file='jinnan2_round1_train_20190305/val.json',
+    transforms=eval_transforms,
+    num_workers=2)
+
+# 初始化模型，并进行训练
+model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1)
+model.train(
+    num_epochs=17,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    train_batch_size=8,
+    learning_rate=0.01,
+    lr_decay_epochs=[13, 16],
+    save_dir='./output')
+```
+
+## 实验细则
+
+(1) 数据集
+
+我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂，很多背景物体与目标物体较为相似。
+
+* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。
+
+* 训练集有883张违禁品图片，验证集有98张违禁品图片。
+
+* 无违禁品的X光图片有2540张。
+
+(2) 基准模型
+
+使用FasterRCNN-ResNet50作为检测模型，除了水平翻转外没有使用其他的数据增强方式，只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1，mmAP有45.8%，mAP达到83%。
+
+(3) 通过负样本学习后的模型
+
+把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试，发现图片级别的误检率高达55.27%。为了降低该误检率，将基准模型在无违禁品训练集进行测试，挑选出被误检图片共663张，将这663张图片加入训练，训练参数配置与基准模型训练时一致。
+
+通过负样本学习后的模型在违禁品验证集上的精度见表1，mmAP有49.4%，mAP达到83.1%。与基准模型相比，**mmAP有3.6%的提升，mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%，与基准模型相比，**误检率降低了49.68%**。
+
+此外，还测试了两个模型在有违禁品验证集上图片级别的召回率，见表2，与基准模型相比，通过负样本学习后的模型仅漏检了1张图片，召回率几乎是无损的。
--- a/docs/tuning_strategy/index.rst
+++ b/docs/tuning_strategy/index.rst
+PaddleX调优策略介绍
+============================
+
+.. toctree::
+   :maxdepth: 2
+
+   detection/index.rst
--- a/docs/tutorials/deploy/deploy_lite.md
+++ b/docs/tutorials/deploy/deploy_lite.md
@@ -21,7 +21,7 @@ step 2: 将PaddleX模型导出为inference模型
 step 3: 将inference模型转换成PaddleLite模型

 ```
-python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run
+python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/lite_model --place place/to/run

 ```


--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md
@@ -19,18 +19,18 @@

 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference

-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:

-|  版本说明   | 预测库(1.7.2版本)  |
+|  版本说明   | 预测库(1.8.2版本)  |
 |  ----  | ----  |
-| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |

-更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html#id1)
+更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)

 下载并解压后`/root/projects/fluid_inference`目录包含内容为：
 ```
@@ -42,7 +42,7 @@ fluid_inference
 └── version.txt # 版本和编译信息
 ```

-**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译，使用高版本`GCC`可能存在 `ABI`兼容性问题，建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。
+**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译，使用高版本`GCC`可能存在 `ABI`兼容性问题，建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。


 ### Step4: 编译
@@ -55,17 +55,17 @@ WITH_GPU=OFF
 WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
-# TensorRT 的lib路径
-TENSORRT_DIR=/path/to/TensorRT/
-# Paddle 预测库路径
-PADDLE_DIR=/path/to/fluid_inference/
+# TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
+TENSORRT_DIR=/root/projects/TensorRT/
+# Paddle 预测库路径, 请修改为您实际安装的预测库路径
+PADDLE_DIR=/root/projects/fluid_inference
 # Paddle 的预测库是否使用静态库来编译
 # 使用TensorRT时，Paddle的预测库通常为动态库
-WITH_STATIC_LIB=ON
+WITH_STATIC_LIB=OFF
 # CUDA 的 lib 路径
-CUDA_LIB=/path/to/cuda/lib/
+CUDA_LIB=/usr/local/cuda/lib64
 # CUDNN 的 lib 路径
-CUDNN_LIB=/path/to/cudnn/lib/
+CUDNN_LIB=/usr/local/cuda/lib64

 # 是否加载加密后的模型
 WITH_ENCRYPTION=ON
@@ -74,8 +74,8 @@ sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具
 ENCRYPTION_DIR=$(pwd)/paddlex-encryption

 # OPENCV 路径, 如果使用自带预编译版本可不修改
+sh $(pwd)/scripts/bootstrap.sh  # 下载预编译版本的opencv
 OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
-sh $(pwd)/scripts/bootstrap.sh

 # 以下无需改动
 rm -rf build
@@ -94,7 +94,6 @@ cmake .. \
    -DENCRYPTION_DIR=${ENCRYPTION_DIR} \
    -DOPENCV_DIR=${OPENCV_DIR}
 make
-
 ```
 **注意：** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML，如果编译环境无法访问外网，可手动下载：

@@ -117,9 +116,7 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://

 **在加载模型前，请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件，请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**  

-> **注意：由于PaddleX代码的持续更新，版本低于1.0.0的模型（模型版本可查看model.yml文件中的version字段）暂时无法直接用于预测部署，参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**  
-
-编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifier`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：

 |  参数   | 说明  |
 |  ----  | ----  |
@@ -127,34 +124,37 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | image  | 要预测的图片文件路径 |
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
-| use_trt  | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
+| use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，**classfier无该参数** |
+| key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |

 ## 样例

-可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。
+可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测，导出到/root/projects，模型路径为/root/projects/inference_model。

 `样例一`：

-不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`  

 ```shell
-./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output
+./build/demo/detector --model_dir=/root/projects/inference_model --image=/root/projects/images/xiaoduxiong.jpeg --save_dir=output
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。


 `样例二`:

-使用`GPU`预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`/root/projects/image_list.txt`，image_list.txt内容的格式如下：
 ```
-/path/to/images/xiaoduxiong1.jpeg
-/path/to/images/xiaoduxiong2.jpeg
+/root/projects/images/xiaoduxiong1.jpeg
+/root/projects/images/xiaoduxiong2.jpeg
 ...
-/path/to/images/xiaoduxiongn.jpeg
+/root/projects/images/xiaoduxiongn.jpeg
 ```
 ```shell
-./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output
+./build/demo/detector --model_dir=/root/projects/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
+++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md
@@ -10,11 +10,10 @@ Windows 平台下，我们使用`Visual Studio 2019 Community` 进行了测试

 请确保系统已经安装好上述基本软件，我们使用的是`VS2019`的社区版。

-**下面所有示例以工作目录为 `D:\projects`演示**。
+**下面所有示例以工作目录为 `D:\projects`演示。**

-### Step1: 下载代码
+### Step1: 下载PaddleX预测代码

-下载源代码
 ```shell
 d:
 mkdir projects
@@ -22,25 +21,24 @@ cd projects
 git clone https://github.com/PaddlePaddle/PaddleX.git
 ```

-**说明**：其中`C++`预测代码在`PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
+**说明**：其中`C++`预测代码在`PaddleX\deploy\cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。


 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference

-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.7版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库，目前PaddleX依赖于Paddle 1.8，基于Paddle 1.8的Paddle预测库下载链接如下所示:

-|  版本说明   | 预测库(1.7.2版本)  | 编译器 | 构建工具| cuDNN | CUDA
+|  版本说明   | 预测库(1.8.2版本)  | 编译器 | 构建工具| cuDNN | CUDA |
 |  ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
+| cpu_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cuda9.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda10.0_cudnn7_avx_mkl  | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |

+请根据实际情况选择下载，如若以上版本不满足您的需求，请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。

-更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
-
-解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为：
+将预测库解压后，其所在目录（例如`D:\projects\fluid_inference\`）下主要包含的内容有：
 ```
 ├── \paddle\ # paddle核心库和头文件
 |
@@ -51,8 +49,8 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens

 ### Step3: 安装配置OpenCV

-1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
-2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe)  
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，例如`D:\projects\opencv`
 3. 配置环境变量，如下流程所示  
    - 我的电脑->属性->高级系统设置->环境变量
    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
@@ -63,22 +61,21 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 1. 打开Visual Studio 2019 Community，点击`继续但无需代码`

 ![step2](../../images/vs2019_step1.png)
-
 2. 点击： `文件`->`打开`->`CMake`

 ![step2.1](../../images/vs2019_step2.png)

-选择项目代码所在路径，并打开`CMakeList.txt`：
+选择C++预测代码所在路径（例如`D:\projects\PaddleX\deploy\cpp`），并打开`CMakeList.txt`：

 ![step2.2](../../images/vs2019_step3.png)
-
-3. 点击：`项目`->`PADDLEX_INFERENCE的CMake设置`
+3. 点击：`项目`->`CMake设置`

 ![step3](../../images/vs2019_step4.png)
-
 4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径

-依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐，**使用9.0、10.0版本，不使用9.2、10.1等版本CUDA库**）：
+![step3](../../images/vs2019_step5.png)
+
+依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量与Paddle预测库的对齐，例如Paddle预测库是**使用9.0、10.0版本**编译的，则编译PaddleX预测代码时**不使用9.2、10.1等版本**CUDA库）：

 |  参数名   | 含义  |
 |  ----  | ----  |
@@ -87,38 +84,33 @@ PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持Tens
 | PADDLE_DIR | Paddle c++预测库的路径 |

 **注意：**
-1. 使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾
-
+1. 如果使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾
 2. 如果使用的是`openblas`版本，请把`WITH_MKL`的`值`去掉勾
-
 3. Windows环境下编译会自动下载YAML，如果编译环境无法访问外网，可手动下载： [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
-
 yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址，改为下载文件的路径。
+4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)。例如解压到D:/projects，解压后目录为D:/projects/paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入D:/projects/paddlex-encryption。

-![step4](../../images/vs2019_step5.png)
+![step_encryption](../../images/vs2019_step_encryption.png)

-**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
+![step4](../../images/vs2019_step6.png)

+**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
 5. 点击`生成`->`全部生成`

-![step6](../../images/vs2019_step6.png)
-
+![step6](../../images/vs2019_step7.png)

 ### Step5: 预测及可视化

-
-**在加载模型前，请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件，请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**   
-
-**注意：由于PaddleX代码的持续更新，版本低于1.0.0的模型（模型版本可查看model.yml文件中的version字段）暂时无法直接用于预测部署，参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
+**在加载模型前，请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件，请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**  

 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下，打开`cmd`，并切换到该目录：

 ```
-d:
+D:
 cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 ```

-编译成功后，预测demo的入口程序为`paddlex_inference\detector.exe`，`paddlex_inference\classifer.exe`，`paddlex_inference\segmenter.exe`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+编译成功后，预测demo的入口程序为`paddlex_inference\detector.exe`，`paddlex_inference\classifier.exe`，`paddlex_inference\segmenter.exe`，用户可根据自己的模型类型选择，其主要命令参数说明如下：

 |  参数   | 说明  |
 |  ----  | ----  |
@@ -128,33 +120,45 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classfier无该参数 |
-
+| key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |

 ## 样例

-可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。
+可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects，模型路径为D:\projects\inference_model。

-`样例一`：
+### 样例一：(使用未加密的模型对单张图像做预测)

-不使用`GPU`测试图片  `\\path\\to\\xiaoduxiong.jpeg`  
+不使用`GPU`测试图片  `D:\images\xiaoduxiong.jpeg`  

-```shell
-.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=D:\\images\\xiaoduxiong.jpeg --save_dir=output
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output

 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。


-`样例二`:
+### 样例二：(使用未加密的模型对图像列表做预测)

-使用`GPU`预测多个图片`\\path\\to\\image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`D:\images\image_list.txt`，image_list.txt内容的格式如下：
 ```
-\\path\\to\\images\\xiaoduxiong1.jpeg
-\\path\\to\\images\\xiaoduxiong2.jpeg
+D:\images\xiaoduxiong1.jpeg
+D:\images\xiaoduxiong2.jpeg
 ...
-\\path\\to\\images\\xiaoduxiongn.jpeg
+D:\images\xiaoduxiongn.jpeg
 ```
-```shell
-.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image_list=D:\images\image_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
 ```
 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+### 样例三：(使用加密后的模型对单张图片进行预测)
+
+如果未对模型进行加密，请参考[加密PaddleX模型](../encryption.html#paddlex)对模型进行加密。例如加密后的模型所在目录为`D:\projects\encrypted_inference_model`。
+
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\encrypted_inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件可视化预测结果会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/deploy_server/encryption.md
+++ b/docs/tutorials/deploy/deploy_server/encryption.md
@@ -2,7 +2,7 @@

 PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的模型加密工具对推理模型进行加密，预测部署SDK支持直接加载密文模型并完成推理，提升AI模型部署的安全性。

-**注意：目前加密方案仅支持Linux系统**
+**目前加密方案已支持Windows，Linux系统**

 ## 1. 方案简介

@@ -20,7 +20,7 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的

 ![](../images/encryption_process.png)

-下面是对提供的C/C++加解密库内部实现的中文描述，参考以下步骤可以实现 一套加解密库 来适应自己的场景并通过内存数据load到paddlepaddle中（c/c++预测服务）
+下面是对提供的C/C++加解密库内部实现的中文描述，参考以下步骤可以实现一套加解密库来适应自己的场景并通过内存数据加载到Paddle Inference预测库中

 > 1）考虑到加密的模型文件解密后需要从内存加载数据，使用conbine的模式生成模型文件和参数文件。
 >
@@ -34,15 +34,17 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 >
 > 6）在模型解密环节根据加密后的文件读取相关的加密数据到内存中，对内存数据使用AES算法进行解密，注意解密时需要采用与加密时一致的加密算法和加密的模式，以及密钥的数据和长度，否则会导致解密后数据错误。
 >
-> 7）集成模型预测的C/C++库，在具体使用paddlepaddle预测时一般涉及paddle::AnalysisConfig和paddle:Predictor，为了能够从内存数据中直接load解密后的模型明文数据（避免模型解密后创建临时文件），这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。
+> 7）集成模型预测的C/C++库，在具体使用预测时一般涉及paddle::AnalysisConfig和paddle:Predictor，为了能够从内存数据中直接load解密后的模型明文数据（避免模型解密后创建临时文件），这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。

 需要注意的是，在本方案中，密钥集成在上层预测服务的代码中。故模型的安全强度等同于代码抵御逆向调试的强度。为了保护密钥和模型的安全，开发者还需对自己的应用进行加固保护。常见的应用加固手段有：代码混淆，二进制文件加壳 等等，亦或将加密机制更改为AES白盒加密技术来保护密钥。这类技术领域内有大量商业和开源产品可供选择，此处不一一赘述。

 ### 1.2 加密工具

-[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时，编译脚本会自动下载加密工具，您也可以选择手动下载。
+[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。

-加密工具包含内容为：
+[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，该版本加密工具需手动下载，如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具，此处可不必重复下载。
+
+Linux加密工具包含内容为：
 ```
 paddlex-encryption
 ├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
@@ -52,22 +54,40 @@ paddlex-encryption
 └── tool # paddlex_encrypt_tool
 ```

+Windows加密工具包含内容为：
+```
+paddlex-encryption
+├── include # 头文件：paddle_model_decrypt.h（解密）和paddle_model_encrypt.h（加密）
+|
+├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库
+|
+└── tool # paddlex_encrypt_tool.exe 模型加密工具
+```
 ### 1.3 加密PaddleX模型

 对模型完成加密后，加密工具会产生随机密钥信息(用于AES加解密使用），需要在后续加密部署时传入该密钥来用于解密。
 > 密钥由32字节key + 16字节iv组成， 注意这里产生的key是经过base64编码后的，这样可以扩充key的选取范围

+Linux平台:
 ```
-./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
+# 假设模型在/root/projects下
+./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /root/projects/paddlex_inference_model -save_dir /root/projects/paddlex_encrypted_model
 ```

-`-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`（**注意**：由于PaddleX代码的持续更新，版本低于1.0.0的模型暂时无法直接用于预测部署，参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
+Windows平台:
+```
+# 假设模型在D:/projects下
+.\paddlex-encryption\tool\paddlex_encrypt_tool.exe -model_dir D:\projects\paddlex_inference_model -save_dir D:\projects\paddlex_encrypted_model
+```
+
+`-model_dir`用于指定inference模型路径（参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型），可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后，加密过的模型会保存至指定的`-save_dir`下，包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件，同时生成密钥信息，命令输出如下图所示，密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`

 ![](../images/encrypt.png)

 ## 2. PaddleX C++加密部署

-参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifer`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+### 2.1 Linux平台使用
+参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后，预测demo的可执行程序分别为`build/demo/detector`，`build/demo/classifier`，`build/demo/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：

 |  参数   | 说明  |
 |  ----  | ----  |
@@ -75,36 +95,72 @@ paddlex-encryption
 | image  | 要预测的图片文件路径 |
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
-| use_trt  | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
+| use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classifier无该参数 |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
+| batch_size | 预测的批量大小，默认为1 |
+| thread_num | 预测的线程数，默认为cpu处理器个数 |


-## 样例
+### 样例

-可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。

-`样例一`：
+#### 样例一：

-不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`  
+不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`  

 ```shell
-./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+./build/demo/detector --model_dir=/root/projects/paddlex_encrypted_model --image=/root/projects/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。


-`样例二`:
+#### 样例二:

-使用`GPU`预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+使用`GPU`预测多个图片`/root/projects/image_list.txt`，image_list.txt内容的格式如下：
 ```
-/path/to/images/xiaoduxiong1.jpeg
-/path/to/images/xiaoduxiong2.jpeg
+/root/projects/images/xiaoduxiong1.jpeg
+/root/projects/xiaoduxiong2.jpeg
 ...
-/path/to/images/xiaoduxiongn.jpeg
+/root/projects/xiaoduxiongn.jpeg
+```
+```shell
+./build/demo/detector --model_dir=/root/projects/models/paddlex_encrypted_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+### 2.2 Windows平台使用
+参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。需自行下载Windows版PaddleX加密工具压缩包，解压，在编译指南的编译流程基础上，在CMake设置中勾选WITH_ENCRYPTION，ENCRYPTION_DIR填写为加密工具包解压后的目录，再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe，paddlex_inference\classifier.exe，paddlex_inference\segmenter.exe。
+
+### 样例
+
+可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
+
+#### 样例一：
+
+不使用`GPU`测试单张图片，例如图片为`D:\images\xiaoduxiong.jpeg`，加密后的模型目录为`D:\projects\paddlex_encrypted_model`
+
 ```shell
-./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
+```
+`--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+
+#### 样例二:
+
+使用`GPU`预测图片列表，例如图片列表为`D:\projects\image_list.txt`，`image_list.txt`的内容如下：
+```
+D:\projects\images\xiaoduxiong1.jpeg
+D:\projects\images\xiaoduxiong2.jpeg
+...
+D:\projects\images\xiaoduxiongn.jpeg
+```
+
+加密后的模型目录例如为`D:\projects\paddlex_encrypted_model`
+
+```
+.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image_list=D:\projects\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
 ```
 `--key`传入加密工具输出的密钥，例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
--- a/docs/tutorials/deploy/images/vs2019_step4.png
+++ b/docs/tutorials/deploy/images/vs2019_step4.png
--- a/docs/tutorials/deploy/images/vs2019_step5.png
+++ b/docs/tutorials/deploy/images/vs2019_step5.png
--- a/docs/tutorials/deploy/images/vs2019_step6.png
+++ b/docs/tutorials/deploy/images/vs2019_step6.png
--- a/docs/tutorials/deploy/images/vs2019_step7.png
+++ b/docs/tutorials/deploy/images/vs2019_step7.png
--- a/docs/tutorials/deploy/images/vs2019_step_encryption.png
+++ b/docs/tutorials/deploy/images/vs2019_step_encryption.png
--- a/examples/human_segmentation/README.md
+++ b/examples/human_segmentation/README.md
+# HumanSeg人像分割模型
+
+本教程基于PaddleX核心分割网络，提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。
+
+## 安装
+
+**前置依赖**
+* paddlepaddle >= 1.8.0
+* python >= 3.5
+
+```
+pip install paddlex -i https://mirror.baidu.com/pypi/simple
+```
+安装的相关问题参考[PaddleX安装](https://paddlex.readthedocs.io/zh_CN/latest/install.html)
+
+## 预训练模型
+HumanSeg开放了在大规模人像数据上训练的两个预训练模型，满足多种使用场景的需求
+
+| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 |
+| --- | --- | --- | ---| --- |
+| HumanSeg-server  | [humanseg_server_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server.pdparams) | [humanseg_server_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型，适用于服务端GPU且背景复杂的人像场景， 模型结构为Deeplabv3+/Xcetion65, 输入大小（512， 512） |
+| HumanSeg-mobile | [humanseg_mobile_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile.pdparams) | [humanseg_mobile_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景，模型结构为HRNet_w18_samll_v1，输入大小（192， 192）  |
+
+
+模型性能
+
+| 模型 | 模型大小 | 计算耗时 |
+| --- | --- | --- |
+|humanseg_server_inference| 158M | - |
+|humanseg_mobile_inference | 5.8 M | 42.35ms |
+|humanseg_mobile_quant | 1.6M | 24.93ms |
+
+计算耗时运行环境： 小米，cpu：骁龙855， 内存：6GB， 图片大小：192*192
+
+
+**NOTE:**
+其中Checkpoint Parameter为模型权重，用于Fine-tuning场景。
+
+* Inference Model和Quant Inference Model为预测部署模型，包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
+
+* 其中Inference Model适用于服务端的CPU和GPU预测部署，Qunat Inference Model为量化版本，适用于通过Paddle Lite进行移动端等端侧设备部署。
+
+执行以下脚本进行HumanSeg预训练模型的下载
+```bash
+python pretrain_weights/download_pretrain_weights.py
+```
+
+## 下载测试数据
+我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleX可直接加载数据格式。通过运行以下代码进行快速下载，其中包含手机前置摄像头的人像测试视频`video_test.mp4`.
+
+```bash
+python data/download_data.py
+```
+
+## 快速体验视频流人像分割
+结合DIS（Dense Inverse Search-basedmethod）光流算法预测结果与分割结果，改善视频流人像分割
+```bash
+# 通过电脑摄像头进行实时分割处理
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference
+
+# 对人像视频进行分割处理
+python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4
+```
+
+视频分割结果如下：
+
+<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/result.gif" width="20%" height="20%">
+
+根据所选背景进行背景替换，背景可以是一张图片，也可以是一段视频。
+```bash
+# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg
+
+# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg
+
+# 对单张图像进行背景替换
+python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg
+
+```
+
+背景替换结果如下：
+
+<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/bg_replace.gif" width="20%" height="20%">
+
+
+**NOTE**:
+
+视频分割处理时间需要几分钟，请耐心等待。
+
+提供的模型适用于手机摄像头竖屏拍摄场景，宽屏效果会略差一些。
+
+## 训练
+使用下述命令基于与训练模型进行Fine-tuning，请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。
+```bash
+# 指定GPU卡号（以0号卡为例）
+export CUDA_VISIBLE_DEVICES=0
+# 若不使用GPU，则将CUDA_VISIBLE_DEVICES指定为空
+# export CUDA_VISIBLE_DEVICES=
+python train.py --model_type HumanSegMobile \
+--save_dir output/ \
+--data_dir data/mini_supervisely \
+--train_list data/mini_supervisely/train.txt \
+--val_list data/mini_supervisely/val.txt \
+--pretrain_weights pretrain_weights/humanseg_mobile_params \
+--batch_size 8 \
+--learning_rate 0.001 \
+--num_epochs 10 \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_type`: 模型类型，可选项为：HumanSegServer和HumanSegMobile
+* `--save_dir`: 模型保存路径
+* `--data_dir`: 数据集路径
+* `--train_list`: 训练集列表路径
+* `--val_list`: 验证集列表路径
+* `--pretrain_weights`: 预训练模型路径
+* `--batch_size`: 批大小
+* `--learning_rate`: 初始学习率
+* `--num_epochs`: 训练轮数
+* `--image_shape`: 网络输入图像大小（w, h）
+
+更多命令行帮助可运行下述命令进行查看：
+```bash
+python train.py --help
+```
+**NOTE**
+可通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
+
+## 评估
+使用下述命令进行评估
+```bash
+python eval.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--val_list data/mini_supervisely/val.txt \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--val_list`: 验证集列表路径
+* `--image_shape`: 网络输入图像大小（w, h）
+
+## 预测
+使用下述命令进行预测， 预测结果默认保存在`./output/result/`文件夹中。
+```bash
+python infer.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--test_list data/mini_supervisely/test.txt \
+--save_dir output/result \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--data_dir`: 数据集路径
+* `--test_list`: 测试集列表路径
+* `--image_shape`: 网络输入图像大小（w, h）
+
+## 模型导出
+```bash
+paddlex --export_inference --model_dir output/best_model \
+--save_dir output/export
+```
+其中参数含义如下：
+* `--model_dir`: 模型路径
+* `--save_dir`: 导出模型保存路径
+
+## 离线量化
+```bash
+python quant_offline.py --model_dir output/best_model \
+--data_dir data/mini_supervisely \
+--quant_list data/mini_supervisely/val.txt \
+--save_dir output/quant_offline \
+--image_shape 192 192
+```
+其中参数含义如下：
+* `--model_dir`: 待量化模型路径
+* `--data_dir`: 数据集路径
+* `--quant_list`: 量化数据集列表路径，一般直接选择训练集或验证集
+* `--save_dir`: 量化模型保存路径
+* `--image_shape`: 网络输入图像大小（w, h）
--- a/examples/human_segmentation/bg_replace.py
+++ b/examples/human_segmentation/bg_replace.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+
+from postprocess import postprocess, threshold_mask
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg inference for video')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for inference',
+        type=str)
+    parser.add_argument(
+        '--image_path',
+        dest='image_path',
+        help='Image including human',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--background_image_path',
+        dest='background_image_path',
+        help='Background image for replacing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--video_path',
+        dest='video_path',
+        help='Video path for inference',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--background_video_path',
+        dest='background_video_path',
+        help='Background video path for replacing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+
+    return parser.parse_args()
+
+
+def bg_replace(label_map, img, bg):
+    h, w, _ = img.shape
+    bg = cv2.resize(bg, (w, h))
+    label_map = np.repeat(label_map[:, :, np.newaxis], 3, axis=2)
+    comb = (label_map * img + (1 - label_map) * bg).astype(np.uint8)
+    return comb
+
+
+def recover(img, im_info):
+    if im_info[0] == 'resize':
+        w, h = im_info[1][1], im_info[1][0]
+        img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
+    elif im_info[0] == 'padding':
+        w, h = im_info[1][0], im_info[1][0]
+        img = img[0:h, 0:w, :]
+    return img
+
+
+def infer(args):
+    resize_h = args.image_shape[1]
+    resize_w = args.image_shape[0]
+
+    test_transforms = transforms.Compose([transforms.Normalize()])
+    model = pdx.load_model(args.model_dir)
+
+    if not osp.exists(args.save_dir):
+        os.makedirs(args.save_dir)
+
+    # 图像背景替换
+    if args.image_path is not None:
+        if not osp.exists(args.image_path):
+            raise Exception('The --image_path is not existed: {}'.format(
+                args.image_path))
+        if args.background_image_path is None:
+            raise Exception(
+                'The --background_image_path is not set. Please set it')
+        else:
+            if not osp.exists(args.background_image_path):
+                raise Exception(
+                    'The --background_image_path is not existed: {}'.format(
+                        args.background_image_path))
+
+        img = cv2.imread(args.image_path)
+        im_shape = img.shape
+        im_scale_x = float(resize_w) / float(im_shape[1])
+        im_scale_y = float(resize_h) / float(im_shape[0])
+        im = cv2.resize(
+            img,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=cv2.INTER_LINEAR)
+        image = im.astype('float32')
+        im_info = ('resize', im_shape[0:2])
+        pred = model.predict(image, test_transforms)
+        label_map = pred['label_map']
+        label_map = recover(label_map, im_info)
+        bg = cv2.imread(args.background_image_path)
+        save_name = osp.basename(args.image_path)
+        save_path = osp.join(args.save_dir, save_name)
+        result = bg_replace(label_map, img, bg)
+        cv2.imwrite(save_path, result)
+
+    # 视频背景替换，如果提供背景视频则以背景视频作为背景，否则采用提供的背景图片
+    else:
+        is_video_bg = False
+        if args.background_video_path is not None:
+            if not osp.exists(args.background_video_path):
+                raise Exception(
+                    'The --background_video_path is not existed: {}'.format(
+                        args.background_video_path))
+            is_video_bg = True
+        elif args.background_image_path is not None:
+            if not osp.exists(args.background_image_path):
+                raise Exception(
+                    'The --background_image_path is not existed: {}'.format(
+                        args.background_image_path))
+        else:
+            raise Exception(
+                'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path'
+            )
+
+        disflow = cv2.DISOpticalFlow_create(
+            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
+        prev_gray = np.zeros((resize_h, resize_w), np.uint8)
+        prev_cfd = np.zeros((resize_h, resize_w), np.float32)
+        is_init = True
+        if args.video_path is not None:
+            logging.info('Please wait. It is computing......')
+            if not osp.exists(args.video_path):
+                raise Exception('The --video_path is not existed: {}'.format(
+                    args.video_path))
+
+            cap_video = cv2.VideoCapture(args.video_path)
+            fps = cap_video.get(cv2.CAP_PROP_FPS)
+            width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            save_name = osp.basename(args.video_path)
+            save_name = save_name.split('.')[0]
+            save_path = osp.join(args.save_dir, save_name + '.avi')
+
+            cap_out = cv2.VideoWriter(
+                save_path,
+                cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
+                (width, height))
+
+            if is_video_bg:
+                cap_bg = cv2.VideoCapture(args.background_video_path)
+                frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
+                current_frame_bg = 1
+            else:
+                img_bg = cv2.imread(args.background_image_path)
+            while cap_video.isOpened():
+                ret, frame = cap_video.read()
+                if ret:
+                    im_shape = frame.shape
+                    im_scale_x = float(resize_w) / float(im_shape[1])
+                    im_scale_y = float(resize_h) / float(im_shape[0])
+                    im = cv2.resize(
+                        frame,
+                        None,
+                        None,
+                        fx=im_scale_x,
+                        fy=im_scale_y,
+                        interpolation=cv2.INTER_LINEAR)
+                    image = im.astype('float32')
+                    im_info = ('resize', im_shape[0:2])
+                    pred = model.predict(image, test_transforms)
+                    score_map = pred['score_map']
+                    cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                    cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                    score_map = 255 * score_map[:, :, 1]
+                    optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                              disflow, is_init)
+                    prev_gray = cur_gray.copy()
+                    prev_cfd = optflow_map.copy()
+                    is_init = False
+                    optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                    optflow_map = threshold_mask(
+                        optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                    score_map = recover(optflow_map, im_info)
+
+                    #循环读取背景帧
+                    if is_video_bg:
+                        ret_bg, frame_bg = cap_bg.read()
+                        if ret_bg:
+                            if current_frame_bg == frames_bg:
+                                current_frame_bg = 1
+                                cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
+                        else:
+                            break
+                        current_frame_bg += 1
+                        comb = bg_replace(score_map, frame, frame_bg)
+                    else:
+                        comb = bg_replace(score_map, frame, img_bg)
+
+                    cap_out.write(comb)
+                else:
+                    break
+
+            if is_video_bg:
+                cap_bg.release()
+            cap_video.release()
+            cap_out.release()
+
+        # 当没有输入预测图像和视频的时候，则打开摄像头
+        else:
+            cap_video = cv2.VideoCapture(0)
+            if not cap_video.isOpened():
+                raise IOError("Error opening video stream or file, "
+                              "--video_path whether existing: {}"
+                              " or camera whether working".format(
+                                  args.video_path))
+                return
+
+            if is_video_bg:
+                cap_bg = cv2.VideoCapture(args.background_video_path)
+                frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
+                current_frame_bg = 1
+            else:
+                img_bg = cv2.imread(args.background_image_path)
+            while cap_video.isOpened():
+                ret, frame = cap_video.read()
+                if ret:
+                    im_shape = frame.shape
+                    im_scale_x = float(resize_w) / float(im_shape[1])
+                    im_scale_y = float(resize_h) / float(im_shape[0])
+                    im = cv2.resize(
+                        frame,
+                        None,
+                        None,
+                        fx=im_scale_x,
+                        fy=im_scale_y,
+                        interpolation=cv2.INTER_LINEAR)
+                    image = im.astype('float32')
+                    im_info = ('resize', im_shape[0:2])
+                    pred = model.predict(image, test_transforms)
+                    score_map = pred['score_map']
+                    cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                    cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                    score_map = 255 * score_map[:, :, 1]
+                    optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                              disflow, is_init)
+                    prev_gray = cur_gray.copy()
+                    prev_cfd = optflow_map.copy()
+                    is_init = False
+                    optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                    optflow_map = threshold_mask(
+                        optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                    score_map = recover(optflow_map, im_info)
+
+                    #循环读取背景帧
+                    if is_video_bg:
+                        ret_bg, frame_bg = cap_bg.read()
+                        if ret_bg:
+                            if current_frame_bg == frames_bg:
+                                current_frame_bg = 1
+                                cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
+                        else:
+                            break
+                        current_frame_bg += 1
+                        comb = bg_replace(score_map, frame, frame_bg)
+                    else:
+                        comb = bg_replace(score_map, frame, img_bg)
+                    cv2.imshow('HumanSegmentation', comb)
+                    if cv2.waitKey(1) & 0xFF == ord('q'):
+                        break
+                else:
+                    break
+            if is_video_bg:
+                cap_bg.release()
+            cap_video.release()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    infer(args)
--- a/examples/human_segmentation/data/download_data.py
+++ b/examples/human_segmentation/data/download_data.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+
+LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
+
+import paddlex as pdx
+
+
+def download_data(savepath):
+    url = "https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip"
+    pdx.utils.download_and_decompress(url=url, path=savepath)
+
+    url = "https://paddleseg.bj.bcebos.com/humanseg/data/video_test.zip"
+    pdx.utils.download_and_decompress(url=url, path=savepath)
+
+
+if __name__ == "__main__":
+    download_data(LOCAL_PATH)
+    print("Data download finish!")
--- a/examples/human_segmentation/eval.py
+++ b/examples/human_segmentation/eval.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for evaluating',
+        type=str,
+        default='output/best_model')
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--val_list',
+        dest='val_list',
+        help='Val list file of dataset',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=128)
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+
+
+def dict2str(dict_input):
+    out = ''
+    for k, v in dict_input.items():
+        try:
+            v = round(float(v), 6)
+        except:
+            pass
+        out = out + '{}={}, '.format(k, v)
+    return out.strip(', ')
+
+
+def evaluate(args):
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.val_list,
+        transforms=eval_transforms)
+
+    model = pdx.load_model(args.model_dir)
+    metrics = model.evaluate(eval_dataset, args.batch_size)
+    logging.info('[EVAL] Finished, {} .'.format(dict2str(metrics)))
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    evaluate(args)
--- a/examples/human_segmentation/infer.py
+++ b/examples/human_segmentation/infer.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+import tqdm
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg prediction and visualization')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for prediction',
+        type=str)
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--test_list',
+        dest='test_list',
+        help='Test list file of dataset',
+        type=str)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output/result')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+
+
+def infer(args):
+    def makedir(path):
+        sub_dir = osp.dirname(path)
+        if not osp.exists(sub_dir):
+            os.makedirs(sub_dir)
+
+    test_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+    model = pdx.load_model(args.model_dir)
+    added_saved_path = osp.join(args.save_dir, 'added')
+    mat_saved_path = osp.join(args.save_dir, 'mat')
+    scoremap_saved_path = osp.join(args.save_dir, 'scoremap')
+
+    with open(args.test_list, 'r') as f:
+        files = f.readlines()
+
+    for file in tqdm.tqdm(files):
+        file = file.strip()
+        im_file = osp.join(args.data_dir, file)
+        im = cv2.imread(im_file)
+        result = model.predict(im_file, transforms=test_transforms)
+
+        # save added image
+        added_image = pdx.seg.visualize(
+            im_file, result, weight=0.6, save_dir=None)
+        added_image_file = osp.join(added_saved_path, file)
+        makedir(added_image_file)
+        cv2.imwrite(added_image_file, added_image)
+
+        # save score map
+        score_map = result['score_map'][:, :, 1]
+        score_map = (score_map * 255).astype(np.uint8)
+        score_map_file = osp.join(scoremap_saved_path, file)
+        makedir(score_map_file)
+        cv2.imwrite(score_map_file, score_map)
+
+        # save mat image
+        score_map = np.expand_dims(score_map, axis=-1)
+        mat_image = np.concatenate([im, score_map], axis=2)
+        mat_file = osp.join(mat_saved_path, file)
+        ext = osp.splitext(mat_file)[-1]
+        mat_file = mat_file.replace(ext, '.png')
+        makedir(mat_file)
+        cv2.imwrite(mat_file, mat_image)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    infer(args)
--- a/examples/human_segmentation/postprocess.py
+++ b/examples/human_segmentation/postprocess.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+def cal_optical_flow_tracking(pre_gray, cur_gray, prev_cfd, dl_weights,
+                              disflow):
+    """计算光流跟踪匹配点和光流图
+    输入参数:
+        pre_gray: 上一帧灰度图
+        cur_gray: 当前帧灰度图
+        prev_cfd: 上一帧光流图
+        dl_weights: 融合权重图
+        disflow: 光流数据结构
+    返回值:
+        is_track: 光流点跟踪二值图，即是否具有光流点匹配
+        track_cfd: 光流跟踪图
+    """
+    check_thres = 8
+    h, w = pre_gray.shape[:2]
+    track_cfd = np.zeros_like(prev_cfd)
+    is_track = np.zeros_like(pre_gray)
+    flow_fw = disflow.calc(pre_gray, cur_gray, None)
+    flow_bw = disflow.calc(cur_gray, pre_gray, None)
+    flow_fw = np.round(flow_fw).astype(np.int)
+    flow_bw = np.round(flow_bw).astype(np.int)
+    y_list = np.array(range(h))
+    x_list = np.array(range(w))
+    yv, xv = np.meshgrid(y_list, x_list)
+    yv, xv = yv.T, xv.T
+    cur_x = xv + flow_fw[:, :, 0]
+    cur_y = yv + flow_fw[:, :, 1]
+
+    # 超出边界不跟踪
+    not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h)
+    flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]]
+    not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) +
+                  np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])
+                  ) >= check_thres
+    track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track]
+
+    is_track[cur_y[~not_track], cur_x[~not_track]] = 1
+
+    not_flow = np.all(np.abs(flow_fw) == 0,
+                      axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1)
+    dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05
+    return track_cfd, is_track, dl_weights
+
+
+def fuse_optical_flow_tracking(track_cfd, dl_cfd, dl_weights, is_track):
+    """光流追踪图和人像分割结构融合
+    输入参数:
+        track_cfd: 光流追踪图
+        dl_cfd: 当前帧分割结果
+        dl_weights: 融合权重图
+        is_track: 光流点匹配二值图
+    返回
+        cur_cfd: 光流跟踪图和人像分割结果融合图
+    """
+    fusion_cfd = dl_cfd.copy()
+    is_track = is_track.astype(np.bool)
+    fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (
+        1 - dl_weights[is_track]) * track_cfd[is_track]
+    # 确定区域
+    index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track
+    index_less01 = (dl_weights < 0.1) * index_certain
+    fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[
+        index_less01]
+    index_larger09 = (dl_weights >= 0.1) * index_certain
+    fusion_cfd[index_larger09] = 0.4 * dl_cfd[
+        index_larger09] + 0.6 * track_cfd[index_larger09]
+    return fusion_cfd
+
+
+def threshold_mask(img, thresh_bg, thresh_fg):
+    dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg)
+    dst[np.where(dst > 1)] = 1
+    dst[np.where(dst < 0)] = 0
+    return dst.astype(np.float32)
+
+
+def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init):
+    """光流优化
+    Args:
+        cur_gray : 当前帧灰度图
+        pre_gray : 前一帧灰度图
+        pre_cfd  ：前一帧融合结果
+        scoremap : 当前帧分割结果
+        difflow  : 光流
+        is_init : 是否第一帧
+    Returns:
+        fusion_cfd : 光流追踪图和预测结果融合图
+    """
+    h, w = scoremap.shape
+    cur_cfd = scoremap.copy()
+
+    if is_init:
+        if h <= 64 or w <= 64:
+            disflow.setFinestScale(1)
+        elif h <= 160 or w <= 160:
+            disflow.setFinestScale(2)
+        else:
+            disflow.setFinestScale(3)
+        fusion_cfd = cur_cfd
+    else:
+        weights = np.ones((h, w), np.float32) * 0.3
+        track_cfd, is_track, weights = cal_optical_flow_tracking(
+            prev_gray, cur_gray, pre_cfd, weights, disflow)
+        fusion_cfd = fuse_optical_flow_tracking(track_cfd, cur_cfd, weights,
+                                                is_track)
+
+    return fusion_cfd
--- a/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py
+++ b/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+
+LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
+
+import paddlex as pdx
+import paddlehub as hub
+
+model_urls = {
+    "PaddleX_HumanSeg_Server_Params":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_params.tar",
+    "PaddleX_HumanSeg_Server_Inference":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_inference.tar",
+    "PaddleX_HumanSeg_Mobile_Params":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_params.tar",
+    "PaddleX_HumanSeg_Mobile_Inference":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_inference.tar",
+    "PaddleX_HumanSeg_Mobile_Quant":
+    "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_quant.tar"
+}
+
+if __name__ == "__main__":
+    for model_name, url in model_urls.items():
+        pdx.utils.download_and_decompress(url=url, path=LOCAL_PATH)
+    print("Pretrained Model download success!")
--- a/examples/human_segmentation/quant_offline.py
+++ b/examples/human_segmentation/quant_offline.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import paddlex as pdx
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for quant',
+        type=str,
+        default='output/best_model')
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=1)
+    parser.add_argument(
+        '--batch_nums',
+        dest='batch_nums',
+        help='Batch number for quant',
+        type=int,
+        default=10)
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='the root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--quant_list',
+        dest='quant_list',
+        help='Image file list for model quantization, it can be vat.txt or train.txt',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the quant model',
+        type=str,
+        default='./output/quant_offline')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    return parser.parse_args()
+
+
+def evaluate(args):
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.quant_list,
+        transforms=eval_transforms)
+
+    model = pdx.load_model(args.model_dir)
+    pdx.slim.export_quant_model(model, eval_dataset, args.batch_size,
+                                args.batch_nums, args.save_dir)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    evaluate(args)
--- a/examples/human_segmentation/train.py
+++ b/examples/human_segmentation/train.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import paddlex as pdx
+from paddlex.seg import transforms
+
+MODEL_TYPE = ['HumanSegMobile', 'HumanSegServer']
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='HumanSeg training')
+    parser.add_argument(
+        '--model_type',
+        dest='model_type',
+        help="Model type for traing, which is one of ('HumanSegMobile', 'HumanSegServer')",
+        type=str,
+        default='HumanSegMobile')
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='The root directory of dataset',
+        type=str)
+    parser.add_argument(
+        '--train_list',
+        dest='train_list',
+        help='Train list file of dataset',
+        type=str)
+    parser.add_argument(
+        '--val_list',
+        dest='val_list',
+        help='Val list file of dataset',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the model snapshot',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        '--num_classes',
+        dest='num_classes',
+        help='Number of classes',
+        type=int,
+        default=2)
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+    parser.add_argument(
+        '--num_epochs',
+        dest='num_epochs',
+        help='Number epochs for training',
+        type=int,
+        default=100)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size',
+        type=int,
+        default=128)
+    parser.add_argument(
+        '--learning_rate',
+        dest='learning_rate',
+        help='Learning rate',
+        type=float,
+        default=0.01)
+    parser.add_argument(
+        '--pretrain_weights',
+        dest='pretrain_weights',
+        help='The path of pretrianed weight',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--resume_checkpoint',
+        dest='resume_checkpoint',
+        help='The path of resume checkpoint',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--use_vdl',
+        dest='use_vdl',
+        help='Whether to use visualdl',
+        action='store_true')
+    parser.add_argument(
+        '--save_interval_epochs',
+        dest='save_interval_epochs',
+        help='The interval epochs for save a model snapshot',
+        type=int,
+        default=5)
+
+    return parser.parse_args()
+
+
+def train(args):
+    train_transforms = transforms.Compose([
+        transforms.Resize(args.image_shape), transforms.RandomHorizontalFlip(),
+        transforms.Normalize()
+    ])
+
+    eval_transforms = transforms.Compose(
+        [transforms.Resize(args.image_shape), transforms.Normalize()])
+
+    train_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.train_list,
+        transforms=train_transforms,
+        shuffle=True)
+    eval_dataset = pdx.datasets.SegDataset(
+        data_dir=args.data_dir,
+        file_list=args.val_list,
+        transforms=eval_transforms)
+
+    if args.model_type == 'HumanSegMobile':
+        model = pdx.seg.HRNet(
+            num_classes=args.num_classes, width='18_small_v1')
+    elif args.model_type == 'HumanSegServer':
+        model = pdx.seg.DeepLabv3p(
+            num_classes=args.num_classes, backbone='Xception65')
+    else:
+        raise ValueError(
+            "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', "
+            "'HumanSegLite', 'HumanSegServer')".format(args.model_type))
+    model.train(
+        num_epochs=args.num_epochs,
+        train_dataset=train_dataset,
+        train_batch_size=args.batch_size,
+        eval_dataset=eval_dataset,
+        save_interval_epochs=args.save_interval_epochs,
+        learning_rate=args.learning_rate,
+        pretrain_weights=args.pretrain_weights,
+        resume_checkpoint=args.resume_checkpoint,
+        save_dir=args.save_dir,
+        use_vdl=args.use_vdl)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    train(args)
--- a/examples/human_segmentation/video_infer.py
+++ b/examples/human_segmentation/video_infer.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import os.path as osp
+import cv2
+import numpy as np
+
+from postprocess import postprocess, threshold_mask
+import paddlex as pdx
+import paddlex.utils.logging as logging
+from paddlex.seg import transforms
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='HumanSeg inference for video')
+    parser.add_argument(
+        '--model_dir',
+        dest='model_dir',
+        help='Model path for inference',
+        type=str)
+    parser.add_argument(
+        '--video_path',
+        dest='video_path',
+        help='Video path for inference, camera will be used if the path not existing',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the inference results',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        "--image_shape",
+        dest="image_shape",
+        help="The image shape for net inputs.",
+        nargs=2,
+        default=[192, 192],
+        type=int)
+
+    return parser.parse_args()
+
+
+def recover(img, im_info):
+    if im_info[0] == 'resize':
+        w, h = im_info[1][1], im_info[1][0]
+        img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
+    elif im_info[0] == 'padding':
+        w, h = im_info[1][0], im_info[1][0]
+        img = img[0:h, 0:w, :]
+    return img
+
+
+def video_infer(args):
+    resize_h = args.image_shape[1]
+    resize_w = args.image_shape[0]
+
+    model = pdx.load_model(args.model_dir)
+    test_transforms = transforms.Compose([transforms.Normalize()])
+    if not args.video_path:
+        cap = cv2.VideoCapture(0)
+    else:
+        cap = cv2.VideoCapture(args.video_path)
+    if not cap.isOpened():
+        raise IOError("Error opening video stream or file, "
+                      "--video_path whether existing: {}"
+                      " or camera whether working".format(args.video_path))
+        return
+
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
+    prev_gray = np.zeros((resize_h, resize_w), np.uint8)
+    prev_cfd = np.zeros((resize_h, resize_w), np.float32)
+    is_init = True
+
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if args.video_path:
+        logging.info("Please wait. It is computing......")
+        # 用于保存预测结果视频
+        if not osp.exists(args.save_dir):
+            os.makedirs(args.save_dir)
+        out = cv2.VideoWriter(
+            osp.join(args.save_dir, 'result.avi'),
+            cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height))
+        # 开始获取视频帧
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret:
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image, test_transforms)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                score_map = 255 * score_map[:, :, 1]
+                optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                        disflow, is_init)
+                prev_gray = cur_gray.copy()
+                prev_cfd = optflow_map.copy()
+                is_init = False
+                optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                optflow_map = threshold_mask(
+                    optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                img_matting = np.repeat(
+                    optflow_map[:, :, np.newaxis], 3, axis=2)
+                img_matting = recover(img_matting, im_info)
+                bg_im = np.ones_like(img_matting) * 255
+                comb = (img_matting * frame +
+                        (1 - img_matting) * bg_im).astype(np.uint8)
+                out.write(comb)
+            else:
+                break
+        cap.release()
+        out.release()
+
+    else:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret:
+                im_shape = frame.shape
+                im_scale_x = float(resize_w) / float(im_shape[1])
+                im_scale_y = float(resize_h) / float(im_shape[0])
+                im = cv2.resize(
+                    frame,
+                    None,
+                    None,
+                    fx=im_scale_x,
+                    fy=im_scale_y,
+                    interpolation=cv2.INTER_LINEAR)
+                image = im.astype('float32')
+                im_info = ('resize', im_shape[0:2])
+                pred = model.predict(image, test_transforms)
+                score_map = pred['score_map']
+                cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+                cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
+                score_map = 255 * score_map[:, :, 1]
+                optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
+                                          disflow, is_init)
+                prev_gray = cur_gray.copy()
+                prev_cfd = optflow_map.copy()
+                is_init = False
+                optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+                optflow_map = threshold_mask(
+                    optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+                img_matting = np.repeat(
+                    optflow_map[:, :, np.newaxis], 3, axis=2)
+                img_matting = recover(img_matting, im_info)
+                bg_im = np.ones_like(img_matting) * 255
+                comb = (img_matting * frame +
+                        (1 - img_matting) * bg_im).astype(np.uint8)
+                cv2.imshow('HumanSegmentation', comb)
+                if cv2.waitKey(1) & 0xFF == ord('q'):
+                    break
+            else:
+                break
+        cap.release()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    video_infer(args)
--- a/new_tutorials/train/README.md
+++ b/new_tutorials/train/README.md
-# 使用教程——训练模型
-
-本目录下整理了使用PaddleX训练模型的示例代码，代码中均提供了示例数据的自动下载，并均使用单张GPU卡进行训练。
-
-|代码 | 模型任务 | 数据 |
-|------|--------|---------|
-|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
-|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
-|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
-|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
-|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
-|segmentation/unet.py | 语义分割UNet | 视盘分割 |
-
-## 开始训练
-在安装PaddleX后，使用如下命令开始训练
-```
-python classification/mobilenetv2.py
-```
--- a/new_tutorials/train/classification/mobilenetv2.py
+++ b/new_tutorials/train/classification/mobilenetv2.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from paddlex.cls import transforms
-import paddlex as pdx
-
-# 下载和解压蔬菜分类数据集
-veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
-pdx.utils.download_and_decompress(veg_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
-train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
-eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
-train_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/train_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/val_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
-model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
-model.train(
-    num_epochs=10,
-    train_dataset=train_dataset,
-    train_batch_size=32,
-    eval_dataset=eval_dataset,
-    lr_decay_epochs=[4, 6, 8],
-    learning_rate=0.025,
-    save_dir='output/mobilenetv2',
-    use_vdl=True)
--- a/new_tutorials/train/classification/resnet50.py
+++ b/new_tutorials/train/classification/resnet50.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import paddle.fluid as fluid
-from paddlex.cls import transforms
-import paddlex as pdx
-
-# 下载和解压蔬菜分类数据集
-veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
-pdx.utils.download_and_decompress(veg_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
-train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
-eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
-train_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/train_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.ImageNet(
-    data_dir='vegetables_cls',
-    file_list='vegetables_cls/val_list.txt',
-    label_list='vegetables_cls/labels.txt',
-    transforms=eval_transforms)
-
-# PaddleX支持自定义构建优化器
-step_each_epoch = train_dataset.num_samples // 32
-learning_rate = fluid.layers.cosine_decay(
-    learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
-optimizer = fluid.optimizer.Momentum(
-    learning_rate=learning_rate,
-    momentum=0.9,
-    regularization=fluid.regularizer.L2Decay(4e-5))
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
-model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
-model.train(
-    num_epochs=10,
-    train_dataset=train_dataset,
-    train_batch_size=32,
-    eval_dataset=eval_dataset,
-    optimizer=optimizer,
-    save_dir='output/resnet50',
-    use_vdl=True)
--- a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py
+++ b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# 下载和解压昆虫检测数据集
-insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
-pdx.utils.download_and_decompress(insect_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
-train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
-eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/train_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/val_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# num_classes 需要设置为包含背景类的类别数，即: 目标类别数量 + 1
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
-num_classes = len(train_dataset.labels) + 1
-model = pdx.det.FasterRCNN(num_classes=num_classes)
-model.train(
-    num_epochs=12,
-    train_dataset=train_dataset,
-    train_batch_size=2,
-    eval_dataset=eval_dataset,
-    learning_rate=0.0025,
-    lr_decay_epochs=[8, 11],
-    save_dir='output/faster_rcnn_r50_fpn',
-    use_vdl=True)
--- a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py
+++ b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# 下载和解压小度熊分拣数据集
-xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
-pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
-train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
-eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
-train_dataset = pdx.datasets.CocoDetection(
-    data_dir='xiaoduxiong_ins_det/JPEGImages',
-    ann_file='xiaoduxiong_ins_det/train.json',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.CocoDetection(
-    data_dir='xiaoduxiong_ins_det/JPEGImages',
-    ann_file='xiaoduxiong_ins_det/val.json',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-# num_classes 需要设置为包含背景类的类别数，即: 目标类别数量 + 1
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
-num_classes = len(train_dataset.labels) + 1
-model = pdx.det.MaskRCNN(num_classes=num_classes)
-model.train(
-    num_epochs=12,
-    train_dataset=train_dataset,
-    train_batch_size=1,
-    eval_dataset=eval_dataset,
-    learning_rate=0.00125,
-    warmup_steps=10,
-    lr_decay_epochs=[8, 11],
-    save_dir='output/mask_rcnn_r50_fpn',
-    use_vdl=True)
--- a/new_tutorials/train/detection/yolov3_darknet53.py
+++ b/new_tutorials/train/detection/yolov3_darknet53.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# 下载和解压昆虫检测数据集
-insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
-pdx.utils.download_and_decompress(insect_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
-train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
-eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/train_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='insect_det',
-    file_list='insect_det/val_list.txt',
-    label_list='insect_det/labels.txt',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
-num_classes = len(train_dataset.labels)
-model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
-model.train(
-    num_epochs=270,
-    train_dataset=train_dataset,
-    train_batch_size=8,
-    eval_dataset=eval_dataset,
-    learning_rate=0.000125,
-    lr_decay_epochs=[210, 240],
-    save_dir='output/yolov3_darknet53',
-    use_vdl=True)
--- a/new_tutorials/train/segmentation/deeplabv3p.py
+++ b/new_tutorials/train/segmentation/deeplabv3p.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import paddlex as pdx
-from paddlex.seg import transforms
-
-# 下载和解压视盘分割数据集
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
-
-train_transforms.add_augmenters([
-    transforms.RandomRotate()
-])
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
-train_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/train_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/val_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
-num_classes = len(train_dataset.labels)
-model = pdx.seg.DeepLabv3p(num_classes=num_classes)
-model.train(
-    num_epochs=40,
-    train_dataset=train_dataset,
-    train_batch_size=4,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    save_dir='output/deeplab',
-    use_vdl=True)
--- a/new_tutorials/train/segmentation/unet.py
+++ b/new_tutorials/train/segmentation/unet.py
-import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import paddlex as pdx
-from paddlex.seg import transforms
-
-# 下载和解压视盘分割数据集
-optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
-pdx.utils.download_and_decompress(optic_dataset, path='./')
-
-# 定义训练和验证时的transforms
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
-
-# 定义训练和验证所用的数据集
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
-train_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/train_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=train_transforms,
-    shuffle=True)
-eval_dataset = pdx.datasets.SegDataset(
-    data_dir='optic_disc_seg',
-    file_list='optic_disc_seg/val_list.txt',
-    label_list='optic_disc_seg/labels.txt',
-    transforms=eval_transforms)
-
-# 初始化模型，并进行训练
-# 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
-# 浏览器打开 https://0.0.0.0:8001即可
-# 其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
-num_classes = len(train_dataset.labels)
-model = pdx.seg.UNet(num_classes=num_classes)
-model.train(
-    num_epochs=20,
-    train_dataset=train_dataset,
-    train_batch_size=4,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    save_dir='output/unet',
-    use_vdl=True)
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -48,9 +48,10 @@ if hub.version.hub_version < '1.6.2':
 env_info = get_environ_info()
 load_model = cv.models.load_model
 datasets = cv.datasets
+transforms = cv.transforms

 log_level = 2

 from . import interpret

-__version__ = '1.0.6'
+__version__ = '1.0.7'
--- a/paddlex/command.py
+++ b/paddlex/command.py
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,6 +15,7 @@
 from six import text_type as _text_type
 import argparse
 import sys
+import paddlex.utils.logging as logging


 def arg_parser():
@@ -94,15 +95,15 @@ def main():
    if args.export_onnx:
        assert args.model_dir is not None, "--model_dir should be defined while exporting onnx model"
        assert args.save_dir is not None, "--save_dir should be defined to create onnx model"
-        assert args.fixed_input_shape is not None, "--fixed_input_shape should be defined [w,h] to create onnx model, such as [224,224]"

-        fixed_input_shape = []
-        if args.fixed_input_shape is not None:
-            fixed_input_shape = eval(args.fixed_input_shape)
-            assert len(
-                fixed_input_shape
-            ) == 2, "len of fixed input shape must == 2, such as [224,224]"
-        model = pdx.load_model(args.model_dir, fixed_input_shape)
+        model = pdx.load_model(args.model_dir)
+        if model.status == "Normal" or model.status == "Prune":
+            logging.error(
+                "Only support inference model, try to export model first as below,",
+                exit=False)
+            logging.error(
+                "paddlex --export_inference --model_dir model_path --save_dir infer_model"
+            )
        pdx.convertor.export_onnx_model(model, args.save_dir)



--- a/paddlex/convertor.py
+++ b/paddlex/convertor.py
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -30,119 +30,17 @@ def export_onnx(model_dir, save_dir, fixed_input_shape):


 def export_onnx_model(model, save_dir):
-    support_list = [
-        'ResNet18', 'ResNet34', 'ResNet50', 'ResNet101', 'ResNet50_vd',
-        'ResNet101_vd', 'ResNet50_vd_ssld', 'ResNet101_vd_ssld', 'DarkNet53',
-        'MobileNetV1', 'MobileNetV2', 'DenseNet121', 'DenseNet161',
-        'DenseNet201'
-    ]
-    if model.__class__.__name__ not in support_list:
-        raise Exception("Model: {} unsupport export to ONNX".format(
-            model.__class__.__name__))
-    try:
-        from fluid.utils import op_io_info, init_name_prefix
-        from onnx import helper, checker
-        import fluid_onnx.ops as ops
-        from fluid_onnx.variables import paddle_variable_to_onnx_tensor, paddle_onnx_weight
-        from debug.model_check import debug_model, Tracker
-    except Exception as e:
+    if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
        logging.error(
-            "Import Module Failed! Please install paddle2onnx. Related requirements see https://github.com/PaddlePaddle/paddle2onnx."
+            "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
        )
-        raise e
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    inference_scope = fluid.global_scope()
-    with fluid.scope_guard(inference_scope):
-        test_input_names = [
-            var.name for var in list(model.test_inputs.values())
-        ]
-        inputs_outputs_list = ["fetch", "feed"]
-        weights, weights_value_info = [], []
-        global_block = model.test_prog.global_block()
-        for var_name in global_block.vars:
-            var = global_block.var(var_name)
-            if var_name not in test_input_names\
-                and var.persistable:
-                weight, val_info = paddle_onnx_weight(
-                    var=var, scope=inference_scope)
-                weights.append(weight)
-                weights_value_info.append(val_info)
-
-        # Create inputs
-        inputs = [
-            paddle_variable_to_onnx_tensor(v, global_block)
-            for v in test_input_names
-        ]
-        logging.INFO("load the model parameter done.")
-        onnx_nodes = []
-        op_check_list = []
-        op_trackers = []
-        nms_first_index = -1
-        nms_outputs = []
-        for block in model.test_prog.blocks:
-            for op in block.ops:
-                if op.type in ops.node_maker:
-                    # TODO: deal with the corner case that vars in
-                    #     different blocks have the same name
-                    node_proto = ops.node_maker[str(op.type)](
-                        operator=op, block=block)
-                    op_outputs = []
-                    last_node = None
-                    if isinstance(node_proto, tuple):
-                        onnx_nodes.extend(list(node_proto))
-                        last_node = list(node_proto)
-                    else:
-                        onnx_nodes.append(node_proto)
-                        last_node = [node_proto]
-                    tracker = Tracker(str(op.type), last_node)
-                    op_trackers.append(tracker)
-                    op_check_list.append(str(op.type))
-                    if op.type == "multiclass_nms" and nms_first_index < 0:
-                        nms_first_index = 0
-                    if nms_first_index >= 0:
-                        _, _, output_op = op_io_info(op)
-                        for output in output_op:
-                            nms_outputs.extend(output_op[output])
-                else:
-                    if op.type not in ['feed', 'fetch']:
-                        op_check_list.append(op.type)
-        logging.info('The operator sets to run test case.')
-        logging.info(set(op_check_list))
-
-        # Create outputs
-        # Get the new names for outputs if they've been renamed in nodes' making
-        renamed_outputs = op_io_info.get_all_renamed_outputs()
-        test_outputs = list(model.test_outputs.values())
-        test_outputs_names = [var.name for var in model.test_outputs.values()]
-        test_outputs_names = [
-            name if name not in renamed_outputs else renamed_outputs[name]
-            for name in test_outputs_names
-        ]
-        outputs = [
-            paddle_variable_to_onnx_tensor(v, global_block)
-            for v in test_outputs_names
-        ]
-
-        # Make graph
-        onnx_name = 'paddlex.onnx'
-        onnx_graph = helper.make_graph(
-            nodes=onnx_nodes,
-            name=onnx_name,
-            initializer=weights,
-            inputs=inputs + weights_value_info,
-            outputs=outputs)
-
-        # Make model
-        onnx_model = helper.make_model(
-            onnx_graph, producer_name='PaddlePaddle')
-
-        # Model check
-        checker.check_model(onnx_model)
-        if onnx_model is not None:
-            onnx_model_file = os.path.join(save_dir, onnx_name)
-            if not os.path.exists(save_dir):
-                os.mkdir(save_dir)
-            with open(onnx_model_file, 'wb') as f:
-                f.write(onnx_model.SerializeToString())
-            logging.info("Saved converted model to path: %s" % onnx_model_file)
+    try:
+        import x2paddle
+        if x2paddle.__version__ < '0.7.4':
+            logging.error("You need to upgrade x2paddle >= 0.7.4")
+    except:
+        logging.error(
+            "You need to install x2paddle first, pip install x2paddle>=0.7.4")
+    from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper
+    mapper = PaddleOpMapper()
+    mapper.convert(model.test_prog, save_dir)
--- a/paddlex/cv/datasets/coco.py
+++ b/paddlex/cv/datasets/coco.py
@@ -100,7 +100,7 @@ class CocoDetection(VOCDetection):
            gt_score = np.ones((num_bbox, 1), dtype=np.float32)
            is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
            difficult = np.zeros((num_bbox, 1), dtype=np.int32)
-            gt_poly = None
+            gt_poly = [None] * num_bbox

            for i, box in enumerate(bboxes):
                catid = box['category_id']
@@ -108,8 +108,6 @@ class CocoDetection(VOCDetection):
                gt_bbox[i, :] = box['clean_bbox']
                is_crowd[i][0] = box['iscrowd']
                if 'segmentation' in box:
-                    if gt_poly is None:
-                        gt_poly = [None] * num_bbox
                    gt_poly[i] = box['segmentation']

            im_info = {
@@ -121,14 +119,12 @@ class CocoDetection(VOCDetection):
                'gt_class': gt_class,
                'gt_bbox': gt_bbox,
                'gt_score': gt_score,
+                'gt_poly': gt_poly,
                'difficult': difficult
            }
-            if gt_poly is not None:
-                label_info['gt_poly'] = gt_poly

            coco_rec = (im_info, label_info)
            self.file_list.append([im_fname, coco_rec])
-
        if not len(self.file_list) > 0:
            raise Exception('not found any coco record in %s' % (ann_file))
        logging.info("{} samples in file {}".format(

--- a/paddlex/cv/datasets/easydata_cls.py
+++ b/paddlex/cv/datasets/easydata_cls.py
@@ -39,14 +39,14 @@ class EasyDataCls(ImageNet):
            线程和'process'进程两种方式。默认为'process'（Windows和Mac下会强制使用thread，该参数无效）。
        shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。
    """
-    
+
    def __init__(self,
                 data_dir,
                 file_list,
                 label_list,
                 transforms=None,
                 num_workers='auto',
-                 buffer_size=100,
+                 buffer_size=8,
                 parallel_method='process',
                 shuffle=False):
        super(ImageNet, self).__init__(
@@ -58,7 +58,7 @@ class EasyDataCls(ImageNet):
        self.file_list = list()
        self.labels = list()
        self._epoch = 0
-        
+
        with open(label_list, encoding=get_encoding(label_list)) as f:
            for line in f:
                item = line.strip()
@@ -73,8 +73,8 @@ class EasyDataCls(ImageNet):
                if not osp.isfile(json_file):
                    continue
                if not osp.exists(img_file):
-                    raise IOError(
-                        'The image file {} is not exist!'.format(img_file))
+                    raise IOError('The image file {} is not exist!'.format(
+                        img_file))
                with open(json_file, mode='r', \
                          encoding=get_encoding(json_file)) as j:
                    json_info = json.load(j)
@@ -83,4 +83,3 @@ class EasyDataCls(ImageNet):
        self.num_samples = len(self.file_list)
        logging.info("{} samples in file {}".format(
            len(self.file_list), file_list))
-    
\ No newline at end of file
--- a/paddlex/cv/datasets/imagenet.py
+++ b/paddlex/cv/datasets/imagenet.py
@@ -45,7 +45,7 @@ class ImageNet(Dataset):
                 label_list,
                 transforms=None,
                 num_workers='auto',
-                 buffer_size=100,
+                 buffer_size=8,
                 parallel_method='process',
                 shuffle=False):
        super(ImageNet, self).__init__(
@@ -70,8 +70,8 @@ class ImageNet(Dataset):
                    continue
                full_path = osp.join(data_dir, items[0])
                if not osp.exists(full_path):
-                    raise IOError(
-                        'The image file {} is not exist!'.format(full_path))
+                    raise IOError('The image file {} is not exist!'.format(
+                        full_path))
                self.file_list.append([full_path, int(items[1])])
        self.num_samples = len(self.file_list)
        logging.info("{} samples in file {}".format(

--- a/paddlex/cv/datasets/seg_dataset.py
+++ b/paddlex/cv/datasets/seg_dataset.py
--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
--- a/paddlex/cv/models/__init__.py
+++ b/paddlex/cv/models/__init__.py
@@ -43,5 +43,6 @@ from .mask_rcnn import MaskRCNN
 from .unet import UNet
 from .deeplabv3p import DeepLabv3p
 from .hrnet import HRNet
+from .fast_scnn import FastSCNN
 from .load_model import load_model
 from .slim import prune
--- a/paddlex/cv/models/base.py
+++ b/paddlex/cv/models/base.py
--- a/paddlex/cv/models/classifier.py
+++ b/paddlex/cv/models/classifier.py
--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
--- a/paddlex/cv/models/fast_scnn.py
+++ b/paddlex/cv/models/fast_scnn.py
--- a/paddlex/cv/models/faster_rcnn.py
+++ b/paddlex/cv/models/faster_rcnn.py
--- a/paddlex/cv/models/hrnet.py
+++ b/paddlex/cv/models/hrnet.py
--- a/paddlex/cv/models/load_model.py
+++ b/paddlex/cv/models/load_model.py
--- a/paddlex/cv/models/mask_rcnn.py
+++ b/paddlex/cv/models/mask_rcnn.py
--- a/paddlex/cv/models/slim/prune.py
+++ b/paddlex/cv/models/slim/prune.py
--- a/paddlex/cv/models/slim/prune_config.py
+++ b/paddlex/cv/models/slim/prune_config.py
--- a/paddlex/cv/models/unet.py
+++ b/paddlex/cv/models/unet.py
--- a/paddlex/cv/models/utils/detection_eval.py
+++ b/paddlex/cv/models/utils/detection_eval.py
--- a/paddlex/cv/models/utils/pretrain_weights.py
+++ b/paddlex/cv/models/utils/pretrain_weights.py
--- a/paddlex/cv/models/utils/visualize.py
+++ b/paddlex/cv/models/utils/visualize.py
--- a/paddlex/cv/models/yolo_v3.py
+++ b/paddlex/cv/models/yolo_v3.py
--- a/paddlex/cv/nets/__init__.py
+++ b/paddlex/cv/nets/__init__.py
--- a/paddlex/cv/nets/densenet.py
+++ b/paddlex/cv/nets/densenet.py
--- a/paddlex/cv/nets/hrnet.py
+++ b/paddlex/cv/nets/hrnet.py
--- a/paddlex/cv/nets/segmentation/__init__.py
+++ b/paddlex/cv/nets/segmentation/__init__.py
@@ -15,5 +15,6 @@
 from .unet import UNet
 from .deeplabv3p import DeepLabv3p
 from .hrnet import HRNet
+from .fast_scnn import FastSCNN
 from .model_utils import libs
 from .model_utils import loss
--- a/paddlex/cv/nets/segmentation/deeplabv3p.py
+++ b/paddlex/cv/nets/segmentation/deeplabv3p.py
--- a/paddlex/cv/nets/segmentation/fast_scnn.py
+++ b/paddlex/cv/nets/segmentation/fast_scnn.py
--- a/paddlex/cv/nets/segmentation/hrnet.py
+++ b/paddlex/cv/nets/segmentation/hrnet.py
--- a/paddlex/cv/nets/segmentation/unet.py
+++ b/paddlex/cv/nets/segmentation/unet.py
--- a/paddlex/cv/nets/shufflenet_v2.py
+++ b/paddlex/cv/nets/shufflenet_v2.py
--- a/paddlex/cv/transforms/__init__.py
+++ b/paddlex/cv/transforms/__init__.py
--- a/paddlex/cv/transforms/cls_transforms.py
+++ b/paddlex/cv/transforms/cls_transforms.py
--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
--- a/paddlex/cv/transforms/seg_transforms.py
+++ b/paddlex/cv/transforms/seg_transforms.py
--- a/paddlex/cv/transforms/visualize.py
+++ b/paddlex/cv/transforms/visualize.py
--- a/paddlex/interpret/as_data_reader/data_path_utils.py
+++ b/paddlex/interpret/as_data_reader/data_path_utils.py
--- a/paddlex/interpret/as_data_reader/readers.py
+++ b/paddlex/interpret/as_data_reader/readers.py
--- a/paddlex/interpret/core/_session_preparation.py
+++ b/paddlex/interpret/core/_session_preparation.py
--- a/paddlex/interpret/core/interpretation.py
+++ b/paddlex/interpret/core/interpretation.py
--- a/paddlex/interpret/core/interpretation_algorithms.py
+++ b/paddlex/interpret/core/interpretation_algorithms.py
--- a/paddlex/interpret/core/normlime_base.py
+++ b/paddlex/interpret/core/normlime_base.py
--- a/paddlex/interpret/visualize.py
+++ b/paddlex/interpret/visualize.py
--- a/paddlex/seg.py
+++ b/paddlex/seg.py
--- a/paddlex/utils/__init__.py
+++ b/paddlex/utils/__init__.py
--- a/paddlex/utils/utils.py
+++ b/paddlex/utils/utils.py
--- a/setup.py
+++ b/setup.py
--- a/tools/codestyle/clang_format.hook
+++ b/tools/codestyle/clang_format.hook
--- a/new_tutorials/train/segmentation/hrnet.py
+++ b/new_tutorials/train/segmentation/hrnet.py