fix conflicts

63e22dbe · FlyingQianMM · 32d7baa2 · f69c5d03 · 63e22dbe · 63e22dbe
166 changed file
--- a/.DS_Store
+++ b/.DS_Store
--- a/.github/ISSUE_TEMPLATE/1_data.md
+++ b/.github/ISSUE_TEMPLATE/1_data.md
@@ -2,5 +2,4 @@
 name: 1. 数据类问题
 about: 数据标注、格式转换等问题
 ---
+数据类型：请说明你的数据类型，如图像分类、目标检测、实例分割或语义分割
-说明数据类型(图像分类、目标检测、实例分割或语义分割)
--- a/.github/ISSUE_TEMPLATE/2_train.md
+++ b/.github/ISSUE_TEMPLATE/2_train.md
@@ -3,4 +3,8 @@ name: 2. 模型训练
 about: 模型训练中的问题
 ---
-如模型训练出错，建议贴上模型训练代码，以便开发人员分析，并快速响应
+问题类型：模型训练  
+**问题描述**  
+====================  
+请在这里描述您在使用过程中的问题，如模型训练出错，建议贴上模型训练代码，以便开发人员分析，并快速响应
--- a/.github/ISSUE_TEMPLATE/3_deploy.md
+++ b/.github/ISSUE_TEMPLATE/3_deploy.md
@@ -3,4 +3,9 @@ name: 3. 模型部署
 about: 模型部署相关问题，包括C++、Python、Paddle Lite等
 ---
-说明您的部署环境，部署需求，模型类型和应用场景等，便于开发人员快速响应。
+问题类型：模型部署
+**问题描述**  
+========================  
+请在这里描述您在使用过程中的问题，说明您的部署环境，部署需求，模型类型和应用场景等，便于开发人员快速响应。
--- a/.github/ISSUE_TEMPLATE/4_gui.md
+++ b/.github/ISSUE_TEMPLATE/4_gui.md
@@ -2,5 +2,8 @@
 name: 4. PaddleX GUI使用问题
 about: Paddle GUI客户端使用问题
 ---
+问题类型：PaddleX GUI  
+**问题描述**  
-PaddleX GUI: https://www.paddlepaddle.org.cn/paddle/paddleX (请在ISSUE内容中保留此行内容)
+===================================  
+请在这里描述您在使用GUI过程中的问题
--- a/.github/ISSUE_TEMPLATE/5_other.md
+++ b/.github/ISSUE_TEMPLATE/5_other.md
@@ -2,3 +2,10 @@
 name: 5. 其它类型问题
 about: 所有问题都可以在这里提
 ---
+问题类型：其它  
+**问题描述**  
+========================
+请在这里描述您的问题
--- a/README.md
+++ b/README.md
@@ -14,10 +14,13 @@
 ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
 ![QQGroup](https://img.shields.io/badge/QQ_Group-1045148026-52B6EF?style=social&logo=tencent-qq&logoColor=000&logoWidth=20)
+[完整PaddleX在线使用文档目录](https://paddlex.readthedocs.io/zh_CN/develop/index.html)
 集成飞桨智能视觉领域**图像分类**、**目标检测**、**语义分割**、**实例分割**任务能力，将深度学习开发全流程从**数据准备**、**模型训练与优化**到**多端部署**端到端打通，并提供**统一任务API接口**及**图形化开发界面Demo**。开发者无需分别安装不同套件，以**低代码**的形式即可快速完成飞桨全流程开发。
 **PaddleX** 经过**质检**、**安防**、**巡检**、**遥感**、**零售**、**医疗**等十多个行业实际应用场景验证，沉淀产业实际经验，**并提供丰富的案例实践教程**，全程助力开发者产业实践落地。
+![](./docs/gui/images/paddlexoverview.png)
 ## 安装
@@ -29,7 +32,7 @@
   通过简洁易懂的Python API，在兼顾功能全面性、开发灵活性、集成方便性的基础上，给开发者最流畅的深度学习开发体验。<br>
  **前置依赖**
-> - paddlepaddle >= 1.8.0
+> - paddlepaddle >= 1.8.4
 > - python >= 3.6
 > - cython
 > - pycocotools
@@ -44,10 +47,11 @@ pip install paddlex -i https://mirror.baidu.com/pypi/simple
   无代码开发的可视化客户端，应用Paddle API实现，使开发者快速进行产业项目验证，并为用户开发自有深度学习软件/应用提供参照。
- 前往[PaddleX官网](https://www.paddlepaddle.org.cn/paddle/paddlex)，申请下载Paddle X GUI一键绿色安装包。
+- 前往[PaddleX官网](https://www.paddlepaddle.org.cn/paddle/paddlex)，申请下载PaddleX GUI一键绿色安装包。
 - 前往[PaddleX GUI使用教程](./docs/gui/how_to_use.md)了解PaddleX GUI使用详情。
+- [PaddleX GUI安装环境说明](./docs/gui/download.md)
 ## 产品模块说明
@@ -104,15 +108,15 @@ pip install paddlex -i https://mirror.baidu.com/pypi/simple
 ## 交流与反馈
 - 项目官网：https://www.paddlepaddle.org.cn/paddle/paddlex
- PaddleX用户交流群：1045148026 (手机QQ扫描如下二维码快速加入)  
+- PaddleX用户交流群：957286141 (手机QQ扫描如下二维码快速加入)  
-  ![](./docs/gui/images/QR.jpg)
+  ![](./docs/gui/images/QR2.jpg)
 ## 更新日志
 > [历史版本及更新内容](https://paddlex.readthedocs.io/zh_CN/develop/change_log.html)
+- 2020.09.05 v1.2.0
 - 2020.07.13 v1.1.0
 - 2020.07.12 v1.0.8
 - 2020.05.20 v1.0.0

--- a/deploy/README.md
+++ b/deploy/README.md
@@ -14,3 +14,5 @@
    - [模型量化](../docs/deploy/paddlelite/slim/quant.md)
    - [模型裁剪](../docs/deploy/paddlelite/slim/prune.md)
  - [Android平台](../docs/deploy/paddlelite/android.md)
+- [OpenVINO部署](../docs/deploy/openvino/introduction.md)
+- [树莓派部署](../docs/deploy/raspberry/Raspberry.md)
\ No newline at end of file
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -320,46 +320,34 @@ target_link_libraries(video_segmenter ${DEPS})
 if (WIN32 AND WITH_MKL)
    add_custom_command(TARGET classifier POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/mkldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    add_custom_command(TARGET detector POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/mkldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    add_custom_command(TARGET segmenter POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/mkldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    add_custom_command(TARGET video_classifier POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/mkldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    add_custom_command(TARGET video_detector POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/kldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    add_custom_command(TARGET video_segmenter POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/Release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./paddlex_inference/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/Release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./paddlex_inference/libiomp5md.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/Release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./paddlex_inference/mkldnn.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
-        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
    )
    # for encryption
    if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")

--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -29,6 +29,10 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -56,6 +60,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);

--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -31,6 +31,10 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -61,6 +65,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);
  int imgs = 1;

--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -30,6 +30,10 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -58,6 +62,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);
  int imgs = 1;

--- a/deploy/cpp/demo/video_classifier.cpp
+++ b/deploy/cpp/demo/video_classifier.cpp
@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,6 +66,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);

--- a/deploy/cpp/demo/video_detector.cpp
+++ b/deploy/cpp/demo/video_detector.cpp
@@ -35,6 +35,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
@@ -42,6 +43,9 @@ DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(show_result, false, "show the result of each frame with a window");
 DEFINE_bool(save_result, true, "save the result of each frame to a video");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
 DEFINE_double(threshold,
              0.5,
@@ -64,6 +68,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);
  // Open video

--- a/deploy/cpp/demo/video_segmenter.cpp
+++ b/deploy/cpp/demo/video_segmenter.cpp
@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(mkl_thread_num,
+             omp_get_num_procs(),
+             "Number of mkl threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,6 +66,8 @@ int main(int argc, char** argv) {
  model.Init(FLAGS_model_dir,
             FLAGS_use_gpu,
             FLAGS_use_trt,
+             FLAGS_use_mkl,
+             FLAGS_mkl_thread_num,
             FLAGS_gpu_id,
             FLAGS_key);
  // Open video

--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -70,6 +70,8 @@ class Model {
   * @param model_dir: the directory which contains model.yml
   * @param use_gpu: use gpu or not when infering
   * @param use_trt: use Tensor RT or not when infering
+   * @param use_mkl: use mkl or not when infering
+   * @param mkl_thread_num: number of threads for mkldnn when infering
   * @param gpu_id: the id of gpu when infering with using gpu
   * @param key: the key of encryption when using encrypted model
   * @param use_ir_optim: use ir optimization when infering
@@ -77,15 +79,26 @@ class Model {
  void Init(const std::string& model_dir,
            bool use_gpu = false,
            bool use_trt = false,
+            bool use_mkl = true,
+            int mkl_thread_num = 4,
            int gpu_id = 0,
            std::string key = "",
            bool use_ir_optim = true) {
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
+    create_predictor(
+                     model_dir,
+                     use_gpu,
+                     use_trt,
+                     use_mkl,
+                     mkl_thread_num,
+                     gpu_id,
+                     key,
+                     use_ir_optim);
  }
  void create_predictor(const std::string& model_dir,
                        bool use_gpu = false,
                        bool use_trt = false,
+                        bool use_mkl = true,
+                        int mkl_thread_num = 4,
                        int gpu_id = 0,
                        std::string key = "",
                        bool use_ir_optim = true);
@@ -219,5 +232,7 @@ class Model {
  std::vector<float> outputs_;
  // a predictor which run the model predicting
  std::unique_ptr<paddle::PaddlePredictor> predictor_;
+  // input channel
+  int input_channel_;
 };
 }  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
@@ -37,7 +37,7 @@ struct Mask {
 };
 /*
- * @brief 
+ * @brief
 * This class represents target box in detection or instance segmentation tasks.
 * */
 struct Box {
@@ -47,7 +47,7 @@ struct Box {
  // confidence score
  float score;
  std::vector<float> coordinate;
-  Mask<float> mask;
+  Mask<int> mask;
 };
 /*

--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
+#include <iostream>
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
@@ -81,6 +82,16 @@ class Normalize : public Transform {
  virtual void Init(const YAML::Node& item) {
    mean_ = item["mean"].as<std::vector<float>>();
    std_ = item["std"].as<std::vector<float>>();
+    if (item["min_val"].IsDefined()) {
+      min_val_ = item["min_val"].as<std::vector<float>>();
+    } else {
+      min_val_ = std::vector<float>(mean_.size(), 0.);
+    }
+    if (item["max_val"].IsDefined()) {
+      max_val_ = item["max_val"].as<std::vector<float>>();
+    } else {
+      max_val_ = std::vector<float>(mean_.size(), 255.);
+    }
  }
  virtual bool Run(cv::Mat* im, ImageBlob* data);
@@ -88,6 +99,8 @@ class Normalize : public Transform {
 private:
  std::vector<float> mean_;
  std::vector<float> std_;
+  std::vector<float> min_val_;
+  std::vector<float> max_val_;
 };
 /*
@@ -216,8 +229,7 @@ class Padding : public Transform {
    }
    if (item["im_padding_value"].IsDefined()) {
      im_value_ = item["im_padding_value"].as<std::vector<float>>();
-    }
+    } else {
-    else {
      im_value_ = {0, 0, 0};
    }
  }
@@ -229,6 +241,25 @@ class Padding : public Transform {
  int height_ = 0;
  std::vector<float> im_value_;
 };
+/*
+ * @brief
+ * This class execute clip operation on image matrix
+ * */
+class Clip : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    min_val_ = item["min_val"].as<std::vector<float>>();
+    max_val_ = item["max_val"].as<std::vector<float>>();
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  std::vector<float> min_val_;
+  std::vector<float> max_val_;
+};
 /*
 * @brief
 * This class is transform operations manager. It stores all neccessary

--- a/deploy/cpp/scripts/bootstrap.sh
+++ b/deploy/cpp/scripts/bootstrap.sh
@@ -8,10 +8,37 @@ fi
 # download pre-compiled opencv lib
 OPENCV_URL=https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2
+{
+    system_name=`awk -F= '/^NAME/{print $2}' /etc/os-release `
+} || {
+    echo "[ERROR] There's some problems, maybe caused by your system is not Ubuntu, refer this doc for more informat: https://github.com/PaddlePaddle/PaddleX/tree/develop/docs/deploy/opencv.md"
+    exit -1
+}
+# download pre-compiled opencv lib
+OPENCV_URL=https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2
+if [ $system_name == '"Ubuntu"' ]
+then
+    system_version=`awk -F= '/^VERSION_ID/{print $2}' /etc/os-release `
+    if [ $system_version == '"18.04"' ]
+    then
+        OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/opencv3.4.6gcc4.8ffmpeg_ubuntu_18.04.tar.gz2
+    elif [ $system_version == '"16.04"' ]
+    then
+        OPENCV_URL=https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2
+    else
+        echo "[ERROR] Cannot find pre-comipled opencv lib for your system environment, refer this doc for more information: https://github.com/PaddlePaddle/PaddleX/tree/develop/docs/deploy/opencv.md"
+        exit -1
+    fi
+else
+    echo "[ERROR] Cannot find pre-comipled opencv lib for your system environment, refer this doc for more information: https://github.com/PaddlePaddle/PaddleX/tree/develop/docs/deploy/opencv.md"
+    exit -1
+fi
 if [ ! -d "./deps/opencv3.4.6gcc4.8ffmpeg/" ]; then
    mkdir -p deps
    cd deps
-    wget -c ${OPENCV_URL}
+    wget -c ${OPENCV_URL} -O opencv3.4.6gcc4.8ffmpeg.tar.gz2
    tar xvfj opencv3.4.6gcc4.8ffmpeg.tar.gz2
    rm -rf opencv3.4.6gcc4.8ffmpeg.tar.gz2
    cd ..

--- a/deploy/cpp/scripts/build.sh
+++ b/deploy/cpp/scripts/build.sh
@@ -5,9 +5,9 @@ WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
 # TensorRT 的路径，如果需要集成TensorRT，需修改为您实际安装的TensorRT路径
-TENSORRT_DIR=/root/projects/TensorRT/
+TENSORRT_DIR=$(pwd)/TensorRT/
 # Paddle 预测库路径, 请修改为您实际安装的预测库路径
-PADDLE_DIR=/root/projects/fluid_inference
+PADDLE_DIR=$(pwd)/fluid_inference
 # Paddle 的预测库是否使用静态库来编译
 # 使用TensorRT时，Paddle的预测库通常为动态库
 WITH_STATIC_LIB=OFF
@@ -16,14 +16,18 @@ CUDA_LIB=/usr/local/cuda/lib64
 # CUDNN 的 lib 路径
 CUDNN_LIB=/usr/local/cuda/lib64
+{
+    bash $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具和opencv依赖库
+} || {
+    echo "Fail to execute script/bootstrap.sh"
+    exit -1
+}
 # 是否加载加密后的模型
 WITH_ENCRYPTION=ON
 # 加密工具的路径, 如果使用自带预编译版本可不修改
-sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具
 ENCRYPTION_DIR=$(pwd)/paddlex-encryption
 # OPENCV 路径, 如果使用自带预编译版本可不修改
-sh $(pwd)/scripts/bootstrap.sh  # 下载预编译版本的opencv
 OPENCV_DIR=$(pwd)/deps/opencv3.4.6gcc4.8ffmpeg/
 # 以下无需改动

--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -11,16 +11,25 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <math.h>
 #include <omp.h>
 #include <algorithm>
 #include <fstream>
 #include <cstring>
 #include "include/paddlex/paddlex.h"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
 namespace PaddleX {
 void Model::create_predictor(const std::string& model_dir,
                             bool use_gpu,
                             bool use_trt,
+                             bool use_mkl,
+                             int mkl_thread_num,
                             int gpu_id,
                             std::string key,
                             bool use_ir_optim) {
@@ -40,7 +49,7 @@ void Model::create_predictor(const std::string& model_dir,
  }
 #endif
  if (yaml_input == "") {
-    // 读取配置文件
+    // read yaml file
    std::ifstream yaml_fin(yaml_file);
    yaml_fin.seekg(0, std::ios::end);
    size_t yaml_file_size = yaml_fin.tellg();
@@ -48,7 +57,7 @@ void Model::create_predictor(const std::string& model_dir,
    yaml_fin.seekg(0);
    yaml_fin.read(&yaml_input[0], yaml_file_size);
  }
-  // 读取配置文件内容
+  // load yaml file
  if (!load_config(yaml_input)) {
    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
    exit(-1);
@@ -57,6 +66,15 @@ void Model::create_predictor(const std::string& model_dir,
  if (key == "") {
    config.SetModel(model_file, params_file);
  }
+  if (use_mkl && !use_gpu) {
+    if (name != "HRNet" && name != "DeepLabv3p" && name != "PPYOLO") {
+        config.EnableMKLDNN();
+        config.SetCpuMathLibraryNumThreads(mkl_thread_num);
+    } else {
+        std::cerr << "HRNet/DeepLabv3p/PPYOLO are not supported "
+                  << "for the use of mkldnn" << std::endl;
+    }
+  }
  if (use_gpu) {
    config.EnableUseGpu(100, gpu_id);
  } else {
@@ -64,15 +82,15 @@ void Model::create_predictor(const std::string& model_dir,
  }
  config.SwitchUseFeedFetchOps(false);
  config.SwitchSpecifyInputNames(true);
-  // 开启图优化
+  // enable graph Optim
 #if defined(__arm__) || defined(__aarch64__)
  config.SwitchIrOptim(false);
 #else
  config.SwitchIrOptim(use_ir_optim);
 #endif
-  // 开启内存优化
+  // enable Memory Optim
  config.EnableMemoryOptim();
-  if (use_trt) {
+  if (use_trt && use_gpu) {
    config.EnableTensorRtEngine(
        1 << 20 /* workspace_size*/,
        32 /* max_batch_size*/,
@@ -108,14 +126,19 @@ bool Model::load_config(const std::string& yaml_input) {
      return false;
    }
  }
-  // 构建数据处理流
+  // build data preprocess stream
  transforms_.Init(config["Transforms"], to_rgb);
-  // 读入label list
+  // read label list
  labels.clear();
  for (const auto& item : config["_Attributes"]["labels"]) {
    int index = labels.size();
    labels[index] = item.as<std::string>();
  }
+  if (config["_init_params"]["input_channel"].IsDefined()) {
+    input_channel_ = config["_init_params"]["input_channel"].as<int>();
+  } else {
+    input_channel_ = 3;
+  }
  return true;
 }
@@ -152,19 +175,19 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
                 "to function predict()!" << std::endl;
    return false;
  }
-  // 处理输入图像
+  // im preprocess
  if (!preprocess(im, &inputs_)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
  }
-  // 使用加载的模型进行预测
+  // predict
  auto in_tensor = predictor_->GetInputTensor("image");
  int h = inputs_.new_im_size_[0];
  int w = inputs_.new_im_size_[1];
-  in_tensor->Reshape({1, 3, h, w});
+  in_tensor->Reshape({1, input_channel_, h, w});
  in_tensor->copy_from_cpu(inputs_.im_data_.data());
  predictor_->ZeroCopyRun();
-  // 取出模型的输出结果
+  // get result
  auto output_names = predictor_->GetOutputNames();
  auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
  std::vector<int> output_shape = output_tensor->shape();
@@ -174,7 +197,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
  }
  outputs_.resize(size);
  output_tensor->copy_to_cpu(outputs_.data());
-  // 对模型输出结果进行后处理
+  // postprocess
  auto ptr = std::max_element(std::begin(outputs_), std::end(outputs_));
  result->category_id = std::distance(std::begin(outputs_), ptr);
  result->score = *ptr;
@@ -198,27 +221,27 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
    return false;
  }
  inputs_batch_.assign(im_batch.size(), ImageBlob());
-  // 处理输入图像
+  // preprocess
  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
  }
-  // 使用加载的模型进行预测
+  // predict
  int batch_size = im_batch.size();
  auto in_tensor = predictor_->GetInputTensor("image");
  int h = inputs_batch_[0].new_im_size_[0];
  int w = inputs_batch_[0].new_im_size_[1];
-  in_tensor->Reshape({batch_size, 3, h, w});
+  in_tensor->Reshape({batch_size, input_channel_, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  std::vector<float> inputs_data(batch_size * input_channel_ * h * w);
  for (int i = 0; i < batch_size; ++i) {
    std::copy(inputs_batch_[i].im_data_.begin(),
              inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel_ * h * w);
  }
  in_tensor->copy_from_cpu(inputs_data.data());
  // in_tensor->copy_from_cpu(inputs_.im_data_.data());
  predictor_->ZeroCopyRun();
-  // 取出模型的输出结果
+  // get result
  auto output_names = predictor_->GetOutputNames();
  auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
  std::vector<int> output_shape = output_tensor->shape();
@@ -228,7 +251,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
  }
  outputs_.resize(size);
  output_tensor->copy_to_cpu(outputs_.data());
-  // 对模型输出结果进行后处理
+  // postprocess
  (*results).clear();
  (*results).resize(batch_size);
  int single_batch_size = size / batch_size;
@@ -258,7 +281,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
    return false;
  }
-  // 处理输入图像
+  // preprocess
  if (!preprocess(im, &inputs_)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
@@ -267,10 +290,10 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
  int h = inputs_.new_im_size_[0];
  int w = inputs_.new_im_size_[1];
  auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->Reshape({1, input_channel_, h, w});
  im_tensor->copy_from_cpu(inputs_.im_data_.data());
-  if (name == "YOLOv3") {
+  if (name == "YOLOv3" || name == "PPYOLO") {
    auto im_size_tensor = predictor_->GetInputTensor("im_size");
    im_size_tensor->Reshape({1, 2});
    im_size_tensor->copy_from_cpu(inputs_.ori_im_size_.data());
@@ -288,7 +311,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
    im_info_tensor->copy_from_cpu(im_info);
    im_shape_tensor->copy_from_cpu(im_shape);
  }
-  // 使用加载的模型进行预测
+  // predict
  predictor_->ZeroCopyRun();
  std::vector<float> output_box;
@@ -306,7 +329,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
    return true;
  }
  int num_boxes = size / 6;
-  // 解析预测框box
+  // box postprocess
  for (int i = 0; i < num_boxes; ++i) {
    Box box;
    box.category_id = static_cast<int>(round(output_box[i * 6]));
@@ -321,7 +344,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
    box.coordinate = {xmin, ymin, w, h};
    result->boxes.push_back(std::move(box));
  }
-  // 实例分割需解析mask
+  // mask postprocess
  if (name == "MaskRCNN") {
    std::vector<float> output_mask;
    auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
@@ -337,12 +360,22 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
    result->mask_resolution = output_mask_shape[2];
    for (int i = 0; i < result->boxes.size(); ++i) {
      Box* box = &result->boxes[i];
-      auto begin_mask =
-          output_mask.begin() + (i * classes + box->category_id) * mask_pixels;
-      auto end_mask = begin_mask + mask_pixels;
-      box->mask.data.assign(begin_mask, end_mask);
      box->mask.shape = {static_cast<int>(box->coordinate[2]),
                         static_cast<int>(box->coordinate[3])};
+      auto begin_mask =
+          output_mask.data() + (i * classes + box->category_id) * mask_pixels;
+      cv::Mat bin_mask(result->mask_resolution,
+                     result->mask_resolution,
+                     CV_32FC1,
+                     begin_mask);
+      cv::resize(bin_mask,
+               bin_mask,
+               cv::Size(box->mask.shape[0], box->mask.shape[1]));
+      cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
+      auto mask_int_begin = reinterpret_cast<float*>(bin_mask.data);
+      auto mask_int_end =
+        mask_int_begin + box->mask.shape[0] * box->mask.shape[1];
+      box->mask.data.assign(mask_int_begin, mask_int_end);
    }
  }
  return true;
@@ -366,12 +399,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
  inputs_batch_.assign(im_batch.size(), ImageBlob());
  int batch_size = im_batch.size();
-  // 处理输入图像
+  // preprocess
  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
  }
-  // 对RCNN类模型做批量padding
+  // RCNN model padding
  if (batch_size > 1) {
    if (name == "FasterRCNN" || name == "MaskRCNN") {
      int max_h = -1;
@@ -411,15 +444,15 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
  int h = inputs_batch_[0].new_im_size_[0];
  int w = inputs_batch_[0].new_im_size_[1];
  auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({batch_size, 3, h, w});
+  im_tensor->Reshape({batch_size, input_channel_, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  std::vector<float> inputs_data(batch_size * input_channel_ * h * w);
  for (int i = 0; i < batch_size; ++i) {
    std::copy(inputs_batch_[i].im_data_.begin(),
              inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel_ * h * w);
  }
  im_tensor->copy_from_cpu(inputs_data.data());
-  if (name == "YOLOv3") {
+  if (name == "YOLOv3" || name == "PPYOLO") {
    auto im_size_tensor = predictor_->GetInputTensor("im_size");
    im_size_tensor->Reshape({batch_size, 2});
    std::vector<int> inputs_data_size(batch_size * 2);
@@ -452,10 +485,10 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
    im_info_tensor->copy_from_cpu(im_info.data());
    im_shape_tensor->copy_from_cpu(im_shape.data());
  }
-  // 使用加载的模型进行预测
+  // predict
  predictor_->ZeroCopyRun();
-  // 读取所有box
+  // get all box
  std::vector<float> output_box;
  auto output_names = predictor_->GetOutputNames();
  auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
@@ -472,7 +505,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
  }
  auto lod_vector = output_box_tensor->lod();
  int num_boxes = size / 6;
-  // 解析预测框box
+  // box postprocess
  (*results).clear();
  (*results).resize(batch_size);
  for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
@@ -492,7 +525,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
    }
  }
-  // 实例分割需解析mask
+  // mask postprocess
  if (name == "MaskRCNN") {
    std::vector<float> output_mask;
    auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
@@ -509,14 +542,24 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
    for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
      (*results)[i].mask_resolution = output_mask_shape[2];
      for (int j = 0; j < (*results)[i].boxes.size(); ++j) {
-        Box* box = &(*results)[i].boxes[j];
+        Box* box = &(*results)[i].boxes[i];
        int category_id = box->category_id;
-        auto begin_mask = output_mask.begin() +
-                          (mask_idx * classes + category_id) * mask_pixels;
-        auto end_mask = begin_mask + mask_pixels;
-        box->mask.data.assign(begin_mask, end_mask);
        box->mask.shape = {static_cast<int>(box->coordinate[2]),
-                           static_cast<int>(box->coordinate[3])};
+                          static_cast<int>(box->coordinate[3])};
+        auto begin_mask =
+          output_mask.data() + (i * classes + box->category_id) * mask_pixels;
+        cv::Mat bin_mask(output_mask_shape[2],
+                      output_mask_shape[2],
+                      CV_32FC1,
+                      begin_mask);
+        cv::resize(bin_mask,
+                bin_mask,
+                cv::Size(box->mask.shape[0], box->mask.shape[1]));
+        cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
+        auto mask_int_begin = reinterpret_cast<float*>(bin_mask.data);
+        auto mask_int_end =
+          mask_int_begin + box->mask.shape[0] * box->mask.shape[1];
+        box->mask.data.assign(mask_int_begin, mask_int_end);
        mask_idx++;
      }
    }
@@ -537,7 +580,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
    return false;
  }
-  // 处理输入图像
+  // preprocess
  if (!preprocess(im, &inputs_)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
@@ -546,13 +589,13 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
  int h = inputs_.new_im_size_[0];
  int w = inputs_.new_im_size_[1];
  auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->Reshape({1, input_channel_, h, w});
  im_tensor->copy_from_cpu(inputs_.im_data_.data());
-  // 使用加载的模型进行预测
+  // predict
  predictor_->ZeroCopyRun();
-  // 获取预测置信度，经过argmax后的labelmap
+  // get labelmap
  auto output_names = predictor_->GetOutputNames();
  auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
  std::vector<int> output_label_shape = output_label_tensor->shape();
@@ -565,7 +608,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
  result->label_map.data.resize(size);
  output_label_tensor->copy_to_cpu(result->label_map.data.data());
-  // 获取预测置信度scoremap
+  // get scoremap
  auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
  std::vector<int> output_score_shape = output_score_tensor->shape();
  size = 1;
@@ -577,7 +620,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
  result->score_map.data.resize(size);
  output_score_tensor->copy_to_cpu(result->score_map.data.data());
-  // 解析输出结果到原图大小
+  // get origin image result
  std::vector<uint8_t> label_map(result->label_map.data.begin(),
                                 result->label_map.data.end());
  cv::Mat mask_label(result->label_map.shape[1],
@@ -647,7 +690,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
    return false;
  }
-  // 处理输入图像
+  // preprocess
  inputs_batch_.assign(im_batch.size(), ImageBlob());
  if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
    std::cerr << "Preprocess failed!" << std::endl;
@@ -660,20 +703,20 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
  int h = inputs_batch_[0].new_im_size_[0];
  int w = inputs_batch_[0].new_im_size_[1];
  auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({batch_size, 3, h, w});
+  im_tensor->Reshape({batch_size, input_channel_, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  std::vector<float> inputs_data(batch_size * input_channel_ * h * w);
  for (int i = 0; i < batch_size; ++i) {
    std::copy(inputs_batch_[i].im_data_.begin(),
              inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel_ * h * w);
  }
  im_tensor->copy_from_cpu(inputs_data.data());
  // im_tensor->copy_from_cpu(inputs_.im_data_.data());
-  // 使用加载的模型进行预测
+  // predict
  predictor_->ZeroCopyRun();
-  // 获取预测置信度，经过argmax后的labelmap
+  // get labelmap
  auto output_names = predictor_->GetOutputNames();
  auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
  std::vector<int> output_label_shape = output_label_tensor->shape();
@@ -698,7 +741,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
              (*results)[i].label_map.data.data());
  }
-  // 获取预测置信度scoremap
+  // get scoremap
  auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
  std::vector<int> output_score_shape = output_score_tensor->shape();
  size = 1;
@@ -722,7 +765,7 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
              (*results)[i].score_map.data.data());
  }
-  // 解析输出结果到原图大小
+  // get origin image result
  for (int i = 0; i < batch_size; ++i) {
    std::vector<uint8_t> label_map((*results)[i].label_map.data.begin(),
                                   (*results)[i].label_map.data.end());

--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -12,12 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "include/paddlex/transforms.h"
+#include <math.h>
 #include <iostream>
 #include <string>
 #include <vector>
-#include <math.h>
-#include "include/paddlex/transforms.h"
 namespace PaddleX {
@@ -28,16 +29,20 @@ std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
                                             {"LANCZOS4", cv::INTER_LANCZOS4}};
 bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
-  for (int h = 0; h < im->rows; h++) {
+  std::vector<float> range_val;
-    for (int w = 0; w < im->cols; w++) {
+  for (int c = 0; c < im->channels(); c++) {
-      im->at<cv::Vec3f>(h, w)[0] =
+    range_val.push_back(max_val_[c] - min_val_[c]);
-          (im->at<cv::Vec3f>(h, w)[0] / 255.0 - mean_[0]) / std_[0];
-      im->at<cv::Vec3f>(h, w)[1] =
-          (im->at<cv::Vec3f>(h, w)[1] / 255.0 - mean_[1]) / std_[1];
-      im->at<cv::Vec3f>(h, w)[2] =
-          (im->at<cv::Vec3f>(h, w)[2] / 255.0 - mean_[2]) / std_[2];
-    }
  }
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    cv::subtract(split_im[c], cv::Scalar(min_val_[c]), split_im[c]);
+    cv::divide(split_im[c], cv::Scalar(range_val[c]), split_im[c]);
+    cv::subtract(split_im[c], cv::Scalar(mean_[c]), split_im[c]);
+    cv::divide(split_im[c], cv::Scalar(std_[c]), split_im[c]);
+  }
+  cv::merge(split_im, *im);
  return true;
 }
@@ -111,11 +116,22 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
              << ", but they should be greater than 0." << std::endl;
    return false;
  }
-  cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
+  std::vector<cv::Mat> padded_im_per_channel;
-  cv::copyMakeBorder(
+  for (size_t i = 0; i < im->channels(); i++) {
-      *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value);
+    const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
+                                        im->cols + padding_w,
+                                        CV_32FC1,
+                                        cv::Scalar(im_value_[i]));
+    padded_im_per_channel.push_back(per_channel);
+  }
+  cv::Mat padded_im;
+  cv::merge(padded_im_per_channel, padded_im);
+  cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows);
+  im->copyTo(padded_im(im_roi));
+  *im = padded_im;
  data->new_im_size_[0] = im->rows;
  data->new_im_size_[1] = im->cols;
  return true;
 }
@@ -161,12 +177,26 @@ bool Resize::Run(cv::Mat* im, ImageBlob* data) {
  return true;
 }
+bool Clip::Run(cv::Mat* im, ImageBlob* data) {
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
+                  cv::THRESH_TRUNC);
+    cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
+    cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
+                  cv::THRESH_TRUNC);
+    cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
+  }
+  cv::merge(split_im, *im);
+  return true;
+}
 void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
  transforms_.clear();
  to_rgb_ = to_rgb;
  for (const auto& item : transforms_node) {
    std::string name = item.begin()->first.as<std::string>();
-    std::cout << "trans name: " << name << std::endl;
    std::shared_ptr<Transform> transform = CreateTransform(name);
    transform->Init(item.begin()->second);
    transforms_.push_back(transform);
@@ -187,6 +217,8 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
    return std::make_shared<Padding>();
  } else if (transform_name == "ResizeByLong") {
    return std::make_shared<ResizeByLong>();
+  } else if (transform_name == "Clip") {
+    return std::make_shared<Clip>();
  } else {
    std::cerr << "There's unexpected transform(name='" << transform_name
              << "')." << std::endl;
@@ -195,11 +227,11 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
 }
 bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
-  // 按照transforms中预处理算子顺序处理图像
+  // do all preprocess ops by order
  if (to_rgb_) {
    cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  }
-  (*im).convertTo(*im, CV_32FC3);
+  (*im).convertTo(*im, CV_32FC(im->channels()));
  data->ori_im_size_[0] = im->rows;
  data->ori_im_size_[1] = im->cols;
  data->new_im_size_[0] = im->rows;
@@ -211,8 +243,8 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
    }
  }
-  // 将图像由NHWC转为NCHW格式
+  // data format NHWC to NCHW
-  // 同时转为连续的内存块存储到ImageBlob
+  // img data save to ImageBlob
  int h = im->rows;
  int w = im->cols;
  int c = im->channels();

--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
@@ -47,7 +47,7 @@ cv::Mat Visualize(const cv::Mat& img,
                            boxes[i].coordinate[2],
                            boxes[i].coordinate[3]);
-    // 生成预测框和标题
+    // draw box and title
    std::string text = boxes[i].category;
    int c1 = colormap[3 * boxes[i].category_id + 0];
    int c2 = colormap[3 * boxes[i].category_id + 1];
@@ -63,13 +63,13 @@ cv::Mat Visualize(const cv::Mat& img,
    origin.x = roi.x;
    origin.y = roi.y;
-    // 生成预测框标题的背景
+    // background
    cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
                                  boxes[i].coordinate[1] - text_size.height,
                                  text_size.width,
                                  text_size.height);
-    // 绘图和文字
+    // draw
    cv::rectangle(vis_img, roi, roi_color, 2);
    cv::rectangle(vis_img, text_back, roi_color, -1);
    cv::putText(vis_img,
@@ -80,18 +80,16 @@ cv::Mat Visualize(const cv::Mat& img,
                cv::Scalar(255, 255, 255),
                thickness);
-    // 生成实例分割mask
+    // mask
    if (boxes[i].mask.data.size() == 0) {
      continue;
    }
-    cv::Mat bin_mask(result.mask_resolution,
+    std::vector<float> mask_data;
-                     result.mask_resolution,
+    mask_data.assign(boxes[i].mask.data.begin(), boxes[i].mask.data.end());
+    cv::Mat bin_mask(boxes[i].mask.shape[1],
+                     boxes[i].mask.shape[0],
                     CV_32FC1,
-                     boxes[i].mask.data.data());
+                     mask_data.data());
-    cv::resize(bin_mask,
-               bin_mask,
-               cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
-    cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
    cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
    bin_mask.copyTo(full_mask(roi));
    cv::Mat mask_ch[3];

--- a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/preprocess/Transforms.java
@@ -23,6 +23,7 @@ import org.opencv.core.Scalar;
 import org.opencv.core.Size;
 import org.opencv.imgproc.Imgproc;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
@@ -101,6 +102,15 @@ public class Transforms {
                if (info.containsKey("coarsest_stride")) {
                    padding.coarsest_stride = (int) info.get("coarsest_stride");
                }
+                if (info.containsKey("im_padding_value")) {
+                    List<Double> im_padding_value = (List<Double>) info.get("im_padding_value");
+                    if (im_padding_value.size()!=3){
+                        Log.e(TAG, "len of im_padding_value in padding must == 3.");
+                    }
+                    for (int k =0; i<im_padding_value.size(); i++){
+                        padding.paddding_value[k] = im_padding_value.get(k);
+                    }
+                }
                if (info.containsKey("target_size")) {
                    if (info.get("target_size") instanceof Integer) {
                        padding.width = (int) info.get("target_size");
@@ -124,7 +134,7 @@ public class Transforms {
        if(transformsMode.equalsIgnoreCase("RGB")){
            Imgproc.cvtColor(inputMat, inputMat, Imgproc.COLOR_BGR2RGB);
        }else if(!transformsMode.equalsIgnoreCase("BGR")){
-            Log.e(TAG, "transformsMode only support RGB or BGR");
+            Log.e(TAG, "transformsMode only support RGB or BGR.");
        }
        inputMat.convertTo(inputMat, CvType.CV_32FC(3));
@@ -136,16 +146,15 @@ public class Transforms {
        int h = inputMat.height();
        int c = inputMat.channels();
        imageBlob.setImageData(new float[w * h * c]);
-        int[] channelStride = new int[]{w * h, w * h * 2};
-        for (int y = 0; y < h; y++) {
+        Mat singleChannelMat = new Mat(h, w, CvType.CV_32FC(1));
-            for (int x = 0;
+        float[] singleChannelImageData = new float[w * h];
-                 x < w; x++) {
+        for (int i = 0; i < c; i++) {
-                double[] color = inputMat.get(y, x);
+            Core.extractChannel(inputMat, singleChannelMat, i);
-                imageBlob.getImageData()[y * w + x]  =  (float) (color[0]);
+            singleChannelMat.get(0, 0, singleChannelImageData);
-                imageBlob.getImageData()[y * w + x +  channelStride[0]] = (float) (color[1]);
+            System.arraycopy(singleChannelImageData ,0, imageBlob.getImageData(),i*w*h, w*h);
-                imageBlob.getImageData()[y * w + x +  channelStride[1]] = (float) (color[2]);
-            }
        }
        return imageBlob;
    }
@@ -248,6 +257,7 @@ public class Transforms {
        private double width;
        private double height;
        private double coarsest_stride;
+        private double[] paddding_value = {0.0, 0.0, 0.0};
        public Mat run(Mat inputMat, ImageBlob imageBlob) {
            int origin_w = inputMat.width();
@@ -264,7 +274,7 @@ public class Transforms {
            }
            imageBlob.setNewImageSize(inputMat.height(),2);
            imageBlob.setNewImageSize(inputMat.width(),3);
-            Core.copyMakeBorder(inputMat, inputMat, 0, (int)padding_h, 0, (int)padding_w, Core.BORDER_CONSTANT, new Scalar(0));
+            Core.copyMakeBorder(inputMat, inputMat, 0, (int)padding_h, 0, (int)padding_w, Core.BORDER_CONSTANT, new Scalar(paddding_value));
            return inputMat;
        }
    }

--- a/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/visual/Visualize.java
+++ b/deploy/lite/android/sdk/src/main/java/com/baidu/paddlex/visual/Visualize.java
@@ -31,8 +31,11 @@ import org.opencv.core.Scalar;
 import org.opencv.core.Size;
 import org.opencv.imgproc.Imgproc;
+import java.nio.ByteBuffer;
+import java.nio.FloatBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Date;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Map;
@@ -120,13 +123,11 @@ public class Visualize {
        int new_w = (int)imageBlob.getNewImageSize()[3];
        Mat mask = new Mat(new_h, new_w, CvType.CV_32FC(1));
        float[] scoreData = new float[new_h*new_w];
-        for  (int h = 0; h < new_h; h++) {
+        System.arraycopy(result.getMask().getScoreData() ,cutoutClass*new_h*new_w, scoreData ,0, new_h*new_w);
-            for  (int w = 0; w < new_w; w++){
-                scoreData[new_h * h + w] =  (1-result.getMask().getScoreData()[cutoutClass + h * new_h + w]) * 255;
-            }
-        }
        mask.put(0,0, scoreData);
+        Core.multiply(mask, new Scalar(255), mask);
        mask.convertTo(mask,CvType.CV_8UC(1));
        ListIterator<Map.Entry<String, int[]>> reverseReshapeInfo = new ArrayList<Map.Entry<String, int[]>>(imageBlob.getReshapeInfo().entrySet()).listIterator(imageBlob.getReshapeInfo().size());
        while (reverseReshapeInfo.hasPrevious()) {
            Map.Entry<String, int[]> entry = reverseReshapeInfo.previous();

--- a/deploy/openvino/CMakeLists.txt
+++ b/deploy/openvino/CMakeLists.txt
@@ -8,7 +8,9 @@ SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 SET(OPENVINO_DIR "" CACHE PATH "Location of libraries")
 SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
 SET(GFLAGS_DIR "" CACHE PATH "Location of libraries")
+SET(GLOG_DIR "" CACHE PATH "Location of libraries")
 SET(NGRAPH_LIB "" CACHE PATH "Location of libraries")
+SET(ARCH "" CACHE PATH "Location of libraries")
 include(cmake/yaml-cpp.cmake)
@@ -27,6 +29,12 @@ macro(safe_set_static_flag)
    endforeach(flag_var)
 endmacro()
+if(NOT WIN32)
+    if (NOT DEFINED ARCH OR ${ARCH} STREQUAL "")
+        message(FATAL_ERROR "please set ARCH with -DARCH=x86 OR armv7")
+    endif()
+endif()
 if (NOT DEFINED OPENVINO_DIR OR ${OPENVINO_DIR} STREQUAL "")
    message(FATAL_ERROR "please set OPENVINO_DIR with -DOPENVINO_DIR=/path/influence_engine")
 endif()
@@ -39,19 +47,32 @@ if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "")
    message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags")
 endif()
+if (NOT DEFINED GLOG_DIR OR ${GLOG_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set GLOG_DIR with -DLOG_DIR=/path/glog")
+endif()
 if (NOT DEFINED NGRAPH_LIB OR ${NGRAPH_LIB} STREQUAL "")
    message(FATAL_ERROR "please set NGRAPH_DIR with -DNGRAPH_DIR=/path/ngraph")
 endif()
 include_directories("${OPENVINO_DIR}")
-link_directories("${OPENVINO_DIR}/lib")
 include_directories("${OPENVINO_DIR}/include")
-link_directories("${OPENVINO_DIR}/external/tbb/lib")
 include_directories("${OPENVINO_DIR}/external/tbb/include/tbb")
+link_directories("${OPENVINO_DIR}/lib")
+link_directories("${OPENVINO_DIR}/external/tbb/lib")
+if(WIN32)
+    link_directories("${OPENVINO_DIR}/lib/intel64/Release")
+    link_directories("${OPENVINO_DIR}/bin/intel64/Release")
+endif()
 link_directories("${GFLAGS_DIR}/lib")
 include_directories("${GFLAGS_DIR}/include")
+link_directories("${GLOG_DIR}/lib")
+include_directories("${GLOG_DIR}/include")
 link_directories("${NGRAPH_LIB}")
 link_directories("${NGRAPH_LIB}/lib")
@@ -79,14 +100,29 @@ else()
    set(CMAKE_STATIC_LIBRARY_PREFIX "")
 endif()
+if(WIN32)
-if(WITH_STATIC_LIB)
+    set(DEPS ${OPENVINO_DIR}/lib/intel64/Release/inference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
-	set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/Release/inference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
-	set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
-	set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX})
+    if (ARCH STREQUAL "armv7")
-	set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a")
-endif()
+        if(WITH_STATIC_LIB)
+            set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
+            set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
+        else()
+            set(DEPS ${OPENVINO_DIR}/lib/armv7l/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX})
+            set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/armv7l/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX})
+        endif()
+    else()
+        if(WITH_STATIC_LIB)
+            set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_STATIC_LIBRARY_SUFFIX})
+            set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_STATIC_LIBRARY_SUFFIX})
+        else()
+            set(DEPS ${OPENVINO_DIR}/lib/intel64/libinference_engine${CMAKE_SHARED_LIBRARY_SUFFIX})
+            set(DEPS ${DEPS} ${OPENVINO_DIR}/lib/intel64/libinference_engine_legacy${CMAKE_SHARED_LIBRARY_SUFFIX})
+        endif()
+    endif()
+endif(WIN32)
 if (NOT WIN32)
    set(DEPS ${DEPS}
@@ -94,7 +130,7 @@ if (NOT WIN32)
        )
 else()
    set(DEPS ${DEPS}
-        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+        glog gflags_static  libyaml-cppmt)
    set(DEPS ${DEPS} libcmt shlwapi)
 endif(NOT WIN32)
@@ -105,7 +141,14 @@ if (NOT WIN32)
 endif()
 set(DEPS ${DEPS} ${OpenCV_LIBS})
-add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp)
+add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp)
 ADD_DEPENDENCIES(classifier ext-yaml-cpp)
 target_link_libraries(classifier ${DEPS})
+add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
+target_link_libraries(segmenter ${DEPS})
+add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(detector ext-yaml-cpp)
+target_link_libraries(detector ${DEPS})
--- a/deploy/openvino/CMakeSettings.json
+++ b/deploy/openvino/CMakeSettings.json
 {
-    "configurations": [
+  "configurations": [
+    {
+      "name": "x64-Release",
+      "generator": "Ninja",
+      "configurationType": "RelWithDebInfo",
+      "inheritEnvironments": [ "msvc_x64_x64" ],
+      "buildRoot": "${projectDir}\\out\\build\\${name}",
+      "installRoot": "${projectDir}\\out\\install\\${name}",
+      "cmakeCommandArgs": "",
+      "buildCommandArgs": "-v",
+      "ctestCommandArgs": "",
+      "variables": [
        {
-            "name": "x64-Release",
+          "name": "OPENCV_DIR",
-            "generator": "Ninja",
+          "value": "/path/to/opencv",
-            "configurationType": "RelWithDebInfo",
+          "type": "PATH"
-            "inheritEnvironments": [ "msvc_x64_x64" ],
+        },
-            "buildRoot": "${projectDir}\\out\\build\\${name}",
+        {
-            "installRoot": "${projectDir}\\out\\install\\${name}",
+          "name": "OPENVINO_DIR",
-            "cmakeCommandArgs": "",
+          "value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/inference_engine",
-            "buildCommandArgs": "-v",
+          "type": "PATH"
-            "ctestCommandArgs": "",
+        },
-            "variables": [
+        {
-                {
+          "name": "NGRAPH_LIB",
-                    "name": "OPENCV_DIR",
+          "value": "C:/Program Files (x86)/IntelSWTools/openvino/deployment_tools/ngraph/lib",
-                    "value": "C:/projects/opencv",
+          "type": "PATH"
-                    "type": "PATH"
+        },
-                },
+        {
-                {
+          "name": "GFLAGS_DIR",
-                    "name": "OPENVINO_LIB",
+          "value": "/path/to/gflags",
-                    "value": "C:/projetcs/inference_engine",
+          "type": "PATH"
-                    "type": "PATH"
+        },
-                }
+        {
-            ]
+          "name": "WITH_STATIC_LIB",
+          "value": "True",
+          "type": "BOOL"
+        },
+        {
+          "name": "GLOG_DIR",
+          "value": "/path/to/glog",
+          "type": "PATH"
        }
-    ]
+      ]
-}
+    }
+  ]
+}
\ No newline at end of file
--- a/deploy/openvino/cmake/yaml-cpp.cmake
+++ b/deploy/openvino/cmake/yaml-cpp.cmake
-find_package(Git REQUIRED)
 include(ExternalProject)

--- a/deploy/openvino/src/classifier.cpp
+++ b/deploy/openvino/src/classifier.cpp
@@ -22,7 +22,7 @@
 #include "include/paddlex/paddlex.h"
 DEFINE_string(model_dir, "", "Path of inference model");
-DEFINE_string(cfg_dir, "", "Path of inference model");
+DEFINE_string(cfg_file, "", "Path of PaddelX model yml file");
 DEFINE_string(device, "CPU", "Device name");
 DEFINE_string(image, "", "Path of test image file");
 DEFINE_string(image_list, "", "Path of test image list file");
@@ -35,8 +35,8 @@ int main(int argc, char** argv) {
    std::cerr << "--model_dir need to be defined" << std::endl;
    return -1;
  }
-  if (FLAGS_cfg_dir == "") {
+  if (FLAGS_cfg_file == "") {
-    std::cerr << "--cfg_dir need to be defined" << std::endl;
+    std::cerr << "--cfg_file need to be defined" << std::endl;
    return -1;
  }
  if (FLAGS_image == "" & FLAGS_image_list == "") {
@@ -44,11 +44,11 @@ int main(int argc, char** argv) {
    return -1;
  }
-  // 加载模型
+  // load model
  PaddleX::Model model;
-  model.Init(FLAGS_model_dir, FLAGS_cfg_dir, FLAGS_device);
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
-  // 进行预测
+  // predict
  if (FLAGS_image_list != "") {
    std::ifstream inf(FLAGS_image_list);
    if (!inf) {

--- a/deploy/openvino/demo/detector.cpp
+++ b/deploy/openvino/demo/detector.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
+DEFINE_string(model_dir, "", "Path of openvino model xml file");
+DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(device, "CPU", "Device name");
+DEFINE_string(save_dir, "", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_cfg_file == "") {
+    std::cerr << "--cfg_file need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+  // load model
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
+  int imgs = 1;
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  // predict
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::DetResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      if (FLAGS_save_dir != "") {
+        cv::Mat vis_img = PaddleX::Visualize(
+          im, result, model.labels, colormap, FLAGS_threshold);
+        std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
+    }
+  } else {
+  PaddleX::DetResult result;
+  cv::Mat im = cv::imread(FLAGS_image, 1);
+  model.predict(im, &result);
+  for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
+      std::cout << ", predict label: " << result.boxes[i].category
+                << ", label_id:" << result.boxes[i].category_id
+                << ", score: " << result.boxes[i].score
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
+                << ", " << result.boxes[i].coordinate[1] << ", "
+                << result.boxes[i].coordinate[2] << ", "
+                << result.boxes[i].coordinate[3] << ")" << std::endl;
+    }
+    if (FLAGS_save_dir != "") {
+    // visualize
+      cv::Mat vis_img = PaddleX::Visualize(
+        im, result, model.labels, colormap, FLAGS_threshold);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+      cv::imwrite(save_path, vis_img);
+      result.clear();
+      std::cout << "Visualized output saved as " << save_path << std::endl;
+    }
+  }
+  return 0;
+}
--- a/deploy/openvino/demo/segmenter.cpp
+++ b/deploy/openvino/demo/segmenter.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <glog/logging.h>
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+DEFINE_string(model_dir, "", "Path of openvino model xml file");
+DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(device, "CPU", "Device name");
+DEFINE_string(save_dir, "", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_cfg_file == "") {
+    std::cerr << "--cfg_file need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+  // load model
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_device);
+  int imgs = 1;
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+    std::cerr << "Fail to open file " << FLAGS_image_list <<std::endl;
+    return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::SegResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      if (FLAGS_save_dir != "") {
+      cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
+        std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
+    }
+  } else {
+    PaddleX::SegResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    if (FLAGS_save_dir != "") {
+      cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+      cv::imwrite(save_path, vis_img);
+      std::cout << "Visualized` output saved as " << save_path << std::endl;
+    }
+    result.clear();
+  }
+  return 0;
+}
--- a/deploy/openvino/include/paddlex/config_parser.h
+++ b/deploy/openvino/include/paddlex/config_parser.h
@@ -54,4 +54,4 @@ class ConfigPaser {
  YAML::Node Transforms_;
 };
-}  // namespace PaddleDetection
+}  // namespace PaddleX
--- a/deploy/openvino/include/paddlex/paddlex.h
+++ b/deploy/openvino/include/paddlex/paddlex.h
@@ -17,6 +17,8 @@
 #include <functional>
 #include <iostream>
 #include <numeric>
+#include <map>
+#include <string>
 #include "yaml-cpp/yaml.h"
@@ -30,35 +32,40 @@
 #include "include/paddlex/config_parser.h"
 #include "include/paddlex/results.h"
 #include "include/paddlex/transforms.h"
-using namespace InferenceEngine;
 namespace PaddleX {
 class Model {
 public:
  void Init(const std::string& model_dir,
-            const std::string& cfg_dir,
+            const std::string& cfg_file,
            std::string device) {
-    create_predictor(model_dir, cfg_dir,  device);
+    create_predictor(model_dir, cfg_file,  device);
  }
  void create_predictor(const std::string& model_dir,
-                        const std::string& cfg_dir,
+                        const std::string& cfg_file,
                        std::string device);
  bool load_config(const std::string& model_dir);
-  bool preprocess(cv::Mat* input_im);
+  bool preprocess(cv::Mat* input_im, ImageBlob* inputs);
  bool predict(const cv::Mat& im, ClsResult* result);
+  bool predict(const cv::Mat& im, DetResult* result);
+  bool predict(const cv::Mat& im, SegResult* result);
  std::string type;
  std::string name;
-  std::vector<std::string> labels;
+  std::map<int, std::string> labels;
  Transforms transforms_;
-  Blob::Ptr inputs_;
+  ImageBlob inputs_;
-  Blob::Ptr output_;
+  InferenceEngine::Blob::Ptr output_;
-  CNNNetwork network_;
+  InferenceEngine::CNNNetwork network_;
-  ExecutableNetwork executable_network_;
+  InferenceEngine::ExecutableNetwork executable_network_;
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/openvino/include/paddlex/results.h
+++ b/deploy/openvino/include/paddlex/results.h
@@ -61,11 +61,11 @@ class DetResult : public BaseResult {
 class SegResult : public BaseResult {
 public:
-  Mask<int64_t> label_map;
+  Mask<int> label_map;
  Mask<float> score_map;
  void clear() {
    label_map.clear();
    score_map.clear();
  }
 };
-}  // namespce of PaddleX
+}  // namespace PaddleX
--- a/deploy/openvino/include/paddlex/transforms.h
+++ b/deploy/openvino/include/paddlex/transforms.h
@@ -16,26 +16,54 @@
 #include <yaml-cpp/yaml.h>
-#include <memory>
-#include <string>
 #include <unordered_map>
 #include <utility>
+#include <memory>
+#include <string>
 #include <vector>
+#include <iostream>
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <inference_engine.hpp>
-using namespace InferenceEngine;
 namespace PaddleX {
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
+class ImageBlob {
+ public:
+  // Original image height and width
+  InferenceEngine::Blob::Ptr ori_im_size_;
+  // Newest image height and width after process
+  std::vector<int> new_im_size_ = std::vector<int>(2);
+  // Image height and width before resize
+  std::vector<std::vector<int>> im_size_before_resize_;
+  // Reshape order
+  std::vector<std::string> reshape_order_;
+  // Resize scale
+  float scale = 1.0;
+  // Buffer for image data after preprocessing
+  InferenceEngine::Blob::Ptr blob;
+  void clear() {
+    im_size_before_resize_.clear();
+    reshape_order_.clear();
+  }
+};
 // Abstraction of preprocessing opration class
 class Transform {
 public:
  virtual void Init(const YAML::Node& item) = 0;
-  virtual bool Run(cv::Mat* im) = 0;
+  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
 };
 class Normalize : public Transform {
@@ -45,7 +73,7 @@ class Normalize : public Transform {
    std_ = item["std"].as<std::vector<float>>();
  }
-  virtual bool Run(cv::Mat* im);
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
 private:
  std::vector<float> mean_;
@@ -61,8 +89,8 @@ class ResizeByShort : public Transform {
    } else {
      max_size_ = -1;
    }
-  };
+  }
-  virtual bool Run(cv::Mat* im);
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
 private:
  float GenerateScale(const cv::Mat& im);
@@ -70,6 +98,55 @@ class ResizeByShort : public Transform {
  int max_size_;
 };
+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
+class ResizeByLong : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    long_size_ = item["long_size"].as<int>();
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int long_size_;
+};
+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
+class Resize : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["interp"].IsDefined()) {
+      interp_ = item["interp"].as<std::string>();
+    }
+    if (item["target_size"].IsScalar()) {
+      height_ = item["target_size"].as<int>();
+      width_ = item["target_size"].as<int>();
+    } else if (item["target_size"].IsSequence()) {
+      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
+      width_ = target_size[0];
+      height_ = target_size[1];
+    }
+    if (height_ <= 0 || width_ <= 0) {
+      std::cerr << "[Resize] target_size should greater than 0" << std::endl;
+      exit(-1);
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int height_;
+  int width_;
+  std::string interp_;
+};
 class CenterCrop : public Transform {
 public:
@@ -83,22 +160,65 @@ class CenterCrop : public Transform {
      height_ = crop_size[1];
    }
  }
-  virtual bool Run(cv::Mat* im);
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
 private:
  int height_;
  int width_;
 };
+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
+class Padding : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["coarsest_stride"].IsDefined()) {
+      coarsest_stride_ = item["coarsest_stride"].as<int>();
+      if (coarsest_stride_ < 1) {
+        std::cerr << "[Padding] coarest_stride should greater than 0"
+                  << std::endl;
+        exit(-1);
+      }
+    }
+    if (item["target_size"].IsDefined()) {
+      if (item["target_size"].IsScalar()) {
+        width_ = item["target_size"].as<int>();
+        height_ = item["target_size"].as<int>();
+      } else if (item["target_size"].IsSequence()) {
+        width_ = item["target_size"].as<std::vector<int>>()[0];
+        height_ = item["target_size"].as<std::vector<int>>()[1];
+      }
+    }
+    if (item["im_padding_value"].IsDefined()) {
+      im_value_ = item["im_padding_value"].as<std::vector<float>>();
+    } else {
+      im_value_ = {0, 0, 0};
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int coarsest_stride_ = -1;
+  int width_ = 0;
+  int height_ = 0;
+  std::vector<float> im_value_;
+};
 class Transforms {
 public:
-  void Init(const YAML::Node& node, bool to_rgb = true);
+  void Init(const YAML::Node& node, std::string type, bool to_rgb = true);
  std::shared_ptr<Transform> CreateTransform(const std::string& name);
-  bool Run(cv::Mat* im, Blob::Ptr blob);
+  bool Run(cv::Mat* im, ImageBlob* data);
 private:
  std::vector<std::shared_ptr<Transform>> transforms_;
  bool to_rgb_ = true;
+  std::string type_;
 };
 }  // namespace PaddleX
--- a/deploy/openvino/include/paddlex/visualize.h
+++ b/deploy/openvino/include/paddlex/visualize.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <iostream>
+#include <map>
+#include <vector>
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+#else  // Linux/Unix
+#include <dirent.h>
+#include <sys/io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <string>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include "include/paddlex/results.h"
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+namespace PaddleX {
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
+std::vector<int> GenerateColorMap(int num_class);
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
+cv::Mat Visualize(const cv::Mat& img,
+                     const DetResult& results,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold = 0.5);
+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
+cv::Mat Visualize(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap);
+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path);
+}  // namespace PaddleX
--- a/deploy/openvino/python/__init__.py
+++ b/deploy/openvino/python/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/deploy/openvino/python/convertor.py
+++ b/deploy/openvino/python/convertor.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from six import text_type as _text_type
+import argparse
+import sys
+from utils import logging 
+import paddlex as pdx
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir",
+        "-m",
+        type=_text_type,
+        default=None,
+        help="define model directory path")
+    parser.add_argument(
+        "--save_dir",
+        "-s",
+        type=_text_type,
+        default=None,
+        help="path to save inference model")
+    parser.add_argument(
+        "--fixed_input_shape",
+        "-fs",
+        default=None,
+        help="export openvino model with  input shape:[w,h]")
+    parser.add_argument(
+        "--data_type",
+        "-dp",
+        default="FP32",
+        help="option, FP32 or FP16, the data_type of openvino IR")
+    return parser
+def export_openvino_model(model, args):
+    if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
+        logging.error(
+            "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to openvino")
+    try:
+        import x2paddle
+        if x2paddle.__version__ < '0.7.4':
+            logging.error("You need to upgrade x2paddle >= 0.7.4")
+    except:
+        logging.error(
+            "You need to install x2paddle first, pip install x2paddle>=0.7.4")
+    import x2paddle.convert as x2pc
+    x2pc.paddle2onnx(args.model_dir, args.save_dir)
+    import mo.main as mo
+    from mo.utils.cli_parser import get_onnx_cli_parser
+    onnx_parser = get_onnx_cli_parser()
+    onnx_parser.add_argument("--model_dir",type=_text_type)
+    onnx_parser.add_argument("--save_dir",type=_text_type)
+    onnx_parser.add_argument("--fixed_input_shape")
+    onnx_input = os.path.join(args.save_dir, 'x2paddle_model.onnx')
+    onnx_parser.set_defaults(input_model=onnx_input)
+    onnx_parser.set_defaults(output_dir=args.save_dir)
+    shape = '[1,3,'
+    shape =  shape + args.fixed_input_shape[1:]
+    if model.__class__.__name__ == "YOLOV3":
+        shape = shape + ",[1,2]"
+        inputs = "image,im_size"
+        onnx_parser.set_defaults(input = inputs)
+    onnx_parser.set_defaults(input_shape = shape)
+    mo.main(onnx_parser,'onnx')
+def main():
+    parser = arg_parser()
+    args = parser.parse_args()
+    assert args.model_dir is not None, "--model_dir should be defined while exporting openvino model"
+    assert args.save_dir is not None, "--save_dir should be defined to create openvino model"
+    model = pdx.load_model(args.model_dir)
+    if model.status == "Normal" or model.status == "Prune":
+        logging.error(
+            "Only support inference model, try to export model first as below,",
+            exit=False)
+    export_openvino_model(model, args)
+if  __name__ == "__main__":
+    main()
--- a/deploy/openvino/python/demo.py
+++ b/deploy/openvino/python/demo.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import argparse
+import deploy
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir",
+        "-m",
+        type=str,
+        default=None,
+        help="path to openvino model .xml file")
+    parser.add_argument(
+        "--device",
+        "-d",
+        type=str,
+        default='CPU',
+        help="Specify the target device to infer on:[CPU, GPU, FPGA, HDDL, MYRIAD,HETERO]"
+        "Default value is CPU")
+    parser.add_argument(
+        "--img", "-i", type=str, default=None, help="path to an image files")
+    parser.add_argument(
+        "--img_list", "-l", type=str, default=None, help="Path to a imglist")
+    parser.add_argument(
+        "--cfg_file",
+        "-c",
+        type=str,
+        default=None,
+        help="Path to PaddelX model yml file")
+    return parser
+def main():
+    parser = arg_parser()
+    args = parser.parse_args()
+    model_xml = args.model_dir
+    model_yaml = args.cfg_file
+    #model init
+    if ("CPU" not in args.device):
+        predictor = deploy.Predictor(model_xml, model_yaml, args.device)
+    else:
+        predictor = deploy.Predictor(model_xml, model_yaml)
+    #predict
+    if (args.img_list != None):
+        f = open(args.img_list)
+        lines = f.readlines()
+        for im_path in lines:
+            print(im_path)
+            predictor.predict(im_path.strip('\n'))
+        f.close()
+    else:
+        im_path = args.img
+        predictor.predict(im_path)
+if __name__ == "__main__":
+    main()
--- a/deploy/openvino/python/deploy.py
+++ b/deploy/openvino/python/deploy.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import os.path as osp
+import time
+import cv2
+import numpy as np
+import yaml
+from six import text_type as _text_type
+from openvino.inference_engine import IECore
+class Predictor:
+    def __init__(self, model_xml, model_yaml, device="CPU"):
+        self.device = device
+        if not osp.exists(model_xml):
+            print("model xml file is not exists in {}".format(model_xml))
+        self.model_xml = model_xml
+        self.model_bin = osp.splitext(model_xml)[0] + ".bin"
+        if not osp.exists(model_yaml):
+            print("model yaml file is not exists in {}".format(model_yaml))
+        with open(model_yaml) as f:
+            self.info = yaml.load(f.read(), Loader=yaml.Loader)
+        self.model_type = self.info['_Attributes']['model_type']
+        self.model_name = self.info['Model']
+        self.num_classes = self.info['_Attributes']['num_classes']
+        self.labels = self.info['_Attributes']['labels']
+        if self.info['Model'] == 'MaskRCNN':
+            if self.info['_init_params']['with_fpn']:
+                self.mask_head_resolution = 28
+            else:
+                self.mask_head_resolution = 14
+        transforms_mode = self.info.get('TransformsMode', 'RGB')
+        if transforms_mode == 'RGB':
+            to_rgb = True
+        else:
+            to_rgb = False
+        self.transforms = self.build_transforms(self.info['Transforms'],
+                                                to_rgb)
+        self.predictor, self.net = self.create_predictor()
+        self.total_time = 0
+        self.count_num = 0
+    def create_predictor(self):
+        #initialization for specified device
+        print("Creating Inference Engine")
+        ie = IECore()
+        print("Loading network files:\n\t{}\n\t{}".format(self.model_xml,
+                                                          self.model_bin))
+        net = ie.read_network(model=self.model_xml, weights=self.model_bin)
+        net.batch_size = 1
+        network_config = {}
+        if self.device == "MYRIAD":
+            network_config = {'VPU_HW_STAGES_OPTIMIZATION': 'NO'}
+        exec_net = ie.load_network(
+            network=net, device_name=self.device, config=network_config)
+        return exec_net, net
+    def build_transforms(self, transforms_info, to_rgb=True):
+        if self.model_type == "classifier":
+            import transforms.cls_transforms as transforms
+        elif self.model_type == "detector":
+            import transforms.det_transforms as transforms
+        elif self.model_type == "segmenter":
+            import transforms.seg_transforms as transforms
+        op_list = list()
+        for op_info in transforms_info:
+            op_name = list(op_info.keys())[0]
+            op_attr = op_info[op_name]
+            if not hasattr(transforms, op_name):
+                raise Exception(
+                    "There's no operator named '{}' in transforms of {}".
+                    format(op_name, self.model_type))
+            op_list.append(getattr(transforms, op_name)(**op_attr))
+        eval_transforms = transforms.Compose(op_list)
+        if hasattr(eval_transforms, 'to_rgb'):
+            eval_transforms.to_rgb = to_rgb
+        self.arrange_transforms(eval_transforms)
+        return eval_transforms
+    def arrange_transforms(self, eval_transforms):
+        if self.model_type == 'classifier':
+            import transforms.cls_transforms as transforms
+            arrange_transform = transforms.ArrangeClassifier
+        elif self.model_type == 'segmenter':
+            import transforms.seg_transforms as transforms
+            arrange_transform = transforms.ArrangeSegmenter
+        elif self.model_type == 'detector':
+            import transforms.det_transforms as transforms
+            arrange_name = 'Arrange{}'.format(self.model_name)
+            arrange_transform = getattr(transforms, arrange_name)
+        else:
+            raise Exception("Unrecognized model type: {}".format(
+                self.model_type))
+        if type(eval_transforms.transforms[-1]).__name__.startswith('Arrange'):
+            eval_transforms.transforms[-1] = arrange_transform(mode='test')
+        else:
+            eval_transforms.transforms.append(arrange_transform(mode='test'))
+    def raw_predict(self, preprocessed_input):
+        self.count_num += 1
+        feed_dict = {}
+        if self.model_name == "YOLOv3":
+            inputs = self.net.inputs
+            for name in inputs:
+                if (len(inputs[name].shape) == 2):
+                    feed_dict[name] = preprocessed_input['im_size']
+                elif (len(inputs[name].shape) == 4):
+                    feed_dict[name] = preprocessed_input['image']
+                else:
+                    pass
+        else:
+            input_blob = next(iter(self.net.inputs))
+            feed_dict[input_blob] = preprocessed_input['image']
+        #Start sync inference
+        print("Starting inference in synchronous mode")
+        res = self.predictor.infer(inputs=feed_dict)
+        #Processing output blob
+        print("Processing output blob")
+        return res
+    def preprocess(self, image):
+        res = dict()
+        if self.model_type == "classifier":
+            im, = self.transforms(image)
+            im = np.expand_dims(im, axis=0).copy()
+            res['image'] = im
+        elif self.model_type == "detector":
+            if self.model_name == "YOLOv3":
+                im, im_shape = self.transforms(image)
+                im = np.expand_dims(im, axis=0).copy()
+                im_shape = np.expand_dims(im_shape, axis=0).copy()
+                res['image'] = im
+                res['im_size'] = im_shape
+            if self.model_name.count('RCNN') > 0:
+                im, im_resize_info, im_shape = self.transforms(image)
+                im = np.expand_dims(im, axis=0).copy()
+                im_resize_info = np.expand_dims(im_resize_info, axis=0).copy()
+                im_shape = np.expand_dims(im_shape, axis=0).copy()
+                res['image'] = im
+                res['im_info'] = im_resize_info
+                res['im_shape'] = im_shape
+        elif self.model_type == "segmenter":
+            im, im_info = self.transforms(image)
+            im = np.expand_dims(im, axis=0).copy()
+            res['image'] = im
+            res['im_info'] = im_info
+        return res
+    def classifier_postprocess(self, preds, topk=1):
+        """ 对分类模型的预测结果做后处理
+        """
+        true_topk = min(self.num_classes, topk)
+        output_name = next(iter(self.net.outputs))
+        pred_label = np.argsort(-preds[output_name][0])[:true_topk]
+        result = [{
+            'category_id': l,
+            'category': self.labels[l],
+            'score': preds[output_name][0][l],
+        } for l in pred_label]
+        print(result)
+        return result
+    def segmenter_postprocess(self, preds, preprocessed_inputs):
+        """ 对语义分割结果做后处理
+        """
+        it = iter(self.net.outputs)
+        next(it)
+        score_name = next(it)
+        score_map = np.squeeze(preds[score_name])
+        score_map = np.transpose(score_map, (1, 2, 0))
+        label_name = next(it)
+        label_map = np.squeeze(preds[label_name]).astype('uint8')
+        im_info = preprocessed_inputs['im_info']
+        for info in im_info[::-1]:
+            if info[0] == 'resize':
+                w, h = info[1][1], info[1][0]
+                label_map = cv2.resize(label_map, (w, h), cv2.INTER_NEAREST)
+                score_map = cv2.resize(score_map, (w, h), cv2.INTER_LINEAR)
+            elif info[0] == 'padding':
+                w, h = info[1][1], info[1][0]
+                label_map = label_map[0:h, 0:w]
+                score_map = score_map[0:h, 0:w, :]
+            else:
+                raise Exception("Unexpected info '{}' in im_info".format(info[
+                    0]))
+        return {'label_map': label_map, 'score_map': score_map}
+    def detector_postprocess(self, preds, preprocessed_inputs):
+        """对图像检测结果做后处理
+        """
+        output_name = next(iter(self.net.outputs))
+        outputs = preds[output_name][0]
+        result = []
+        for out in outputs:
+            if (out[0] > 0):
+                result.append(out.tolist())
+            else:
+                pass
+        print(result)
+        return result
+    def predict(self, image, topk=1, threshold=0.5):
+        preprocessed_input = self.preprocess(image)
+        model_pred = self.raw_predict(preprocessed_input)
+        if self.model_type == "classifier":
+            results = self.classifier_postprocess(model_pred, topk)
+        elif self.model_type == "detector":
+            results = self.detector_postprocess(model_pred, preprocessed_input)
+        elif self.model_type == "segmenter":
+            results = self.segmenter_postprocess(model_pred,
+                                                 preprocessed_input)
--- a/deploy/openvino/python/transforms/__init__.py
+++ b/deploy/openvino/python/transforms/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import cls_transforms
+from . import det_transforms
+from . import seg_transforms
--- a/deploy/openvino/python/transforms/cls_transforms.py
+++ b/deploy/openvino/python/transforms/cls_transforms.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .ops import *
+import random
+import os.path as osp
+import numpy as np
+from PIL import Image, ImageEnhance
+class ClsTransform:
+    """分类Transform的基类
+    """
+    def __init__(self):
+        pass
+class Compose(ClsTransform):
+    """根据数据预处理/增强算子对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强算子。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 数据长度不匹配。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；
+                字段由transforms中的最后一个数据预处理操作决定。
+        """
+        if isinstance(im, np.ndarray):
+            if len(im.shape) != 3:
+                raise Exception(
+                    "im should be 3-dimension, but now is {}-dimensions".
+                    format(len(im.shape)))
+        else:
+            try:
+                im = cv2.imread(im).astype('float32')
+            except:
+                raise TypeError('Can\'t read The image file {}!'.format(im))
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        for op in self.transforms:
+            outputs = op(im, label)
+            im = outputs[0]
+            if len(outputs) == 2:
+                label = outputs[1]
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print(
+                    "{} is already in ComposedTransforms, need to remove it from add_augmenters().".
+                    format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class Normalize(ClsTransform):
+    """对图像进行标准化。
+    1. 对图像进行归一化到区间[0.0, 1.0]。
+    2. 对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
+        std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
+    """
+    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
+        self.mean = mean
+        self.std = std
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class ResizeByShort(ClsTransform):
+    """根据图像短边对图像重新调整大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度；
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        short_size (int): 调整大小后的图像目标短边长度。默认为256。
+        max_size (int): 长边目标长度的最大限制。默认为-1。
+    """
+    def __init__(self, short_size=256, max_size=-1):
+        self.short_size = short_size
+        self.max_size = max_size
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_LINEAR)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class CenterCrop(ClsTransform):
+    """以图像中心点扩散裁剪长宽为`crop_size`的正方形
+    1. 计算剪裁的起始点。
+    2. 剪裁图像。
+    Args:
+        crop_size (int): 裁剪的目标边长。默认为224。
+    """
+    def __init__(self, crop_size=224):
+        self.crop_size = crop_size
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        im = center_crop(im, self.crop_size)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class ArrangeClassifier(ClsTransform):
+    """获取训练/验证/预测所需信息。注意：此操作不需用户自己显示调用
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
+    """
+    def __init__(self, mode=None):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode must be in ['train', 'eval', 'test', 'quant']!")
+        self.mode = mode
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当mode为'train'或'eval'时，返回(im, label)，分别对应图像np.ndarray数据、
+                图像类别id；当mode为'test'或'quant'时，返回(im, )，对应图像np.ndarray数据。
+        """
+        im = permute(im, False).astype('float32')
+        if self.mode == 'train' or self.mode == 'eval':
+            outputs = (im, label)
+        else:
+            outputs = (im, )
+        return outputs
+class ComposedClsTransforms(Compose):
+    """ 分类模型的基础Transforms流程，具体如下
+        训练阶段：
+        1. 随机从图像中crop一块子图，并resize成crop_size大小
+        2. 将1的输出按0.5的概率随机进行水平翻转
+        3. 将图像进行归一化
+        验证/预测阶段：
+        1. 将图像按比例Resize，使得最小边长度为crop_size[0] * 1.14
+        2. 从图像中心crop出一个大小为crop_size的图像
+        3. 将图像进行归一化
+        Args:
+            mode(str): 图像处理流程所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            crop_size(int|list): 输入模型里的图像大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 crop_size=[224, 224],
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = crop_size
+        if isinstance(crop_size, list):
+            if crop_size[0] != crop_size[1]:
+                raise Exception(
+                    "In classifier model, width and height should be equal, please modify your parameter `crop_size`"
+                )
+            width = crop_size[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
+            )
+        if mode == 'train':
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                ResizeByShort(short_size=int(width * 1.14)),
+                CenterCrop(crop_size=width), Normalize(
+                    mean=mean, std=std)
+            ]
+        super(ComposedClsTransforms, self).__init__(transforms)
--- a/deploy/openvino/python/transforms/det_transforms.py
+++ b/deploy/openvino/python/transforms/det_transforms.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+import random
+import os.path as osp
+import numpy as np
+import cv2
+from PIL import Image, ImageEnhance
+from .ops import *
+class DetTransform:
+    """检测数据处理基类
+    """
+    def __init__(self):
+        pass
+class Compose(DetTransform):
+    """根据数据预处理/增强列表对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强列表。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 数据长度不匹配。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+        self.use_mixup = False
+        for t in self.transforms:
+            if type(t).__name__ == 'MixupImage':
+                self.use_mixup = True
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            im_info (dict): 存储与图像相关的信息，dict中的字段如下：
+                - im_id (np.ndarray): 图像序列号，形状为(1,)。
+                - image_shape (np.ndarray): 图像原始大小，形状为(2,)，
+                                        image_shape[0]为高，image_shape[1]为宽。
+                - mixup (list): list为[im, im_info, label_info]，分别对应
+                                与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息；
+                                注意，当前epoch若无需进行mixup，则无该字段。
+            label_info (dict): 存储与标注框相关的信息，dict中的字段如下：
+                - gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2]，形状为(n, 4)，
+                                   其中n代表真实标注框的个数。
+                - gt_class (np.ndarray): 每个真实标注框对应的类别序号，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - gt_score (np.ndarray): 每个真实标注框对应的混合得分，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - gt_poly (list): 每个真实标注框内的多边形分割区域，每个分割区域由点的x、y坐标组成，
+                                  长度为n，其中n代表真实标注框的个数。
+                - is_crowd (np.ndarray): 每个真实标注框中是否是一组对象，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象，形状为(n, 1)，
+                                     其中n代表真实标注框的个数。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；
+                字段由transforms中的最后一个数据预处理操作决定。
+        """
+        def decode_image(im_file, im_info, label_info):
+            if im_info is None:
+                im_info = dict()
+            if isinstance(im_file, np.ndarray):
+                if len(im_file.shape) != 3:
+                    raise Exception(
+                        "im should be 3-dimensions, but now is {}-dimensions".
+                        format(len(im_file.shape)))
+                im = im_file
+            else:
+                try:
+                    im = cv2.imread(im_file).astype('float32')
+                except:
+                    raise TypeError('Can\'t read The image file {}!'.format(
+                        im_file))
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+            # make default im_info with [h, w, 1]
+            im_info['im_resize_info'] = np.array(
+                [im.shape[0], im.shape[1], 1.], dtype=np.float32)
+            im_info['image_shape'] = np.array([im.shape[0],
+                                               im.shape[1]]).astype('int32')
+            if not self.use_mixup:
+                if 'mixup' in im_info:
+                    del im_info['mixup']
+            # decode mixup image
+            if 'mixup' in im_info:
+                im_info['mixup'] = \
+                  decode_image(im_info['mixup'][0],
+                               im_info['mixup'][1],
+                               im_info['mixup'][2])
+            if label_info is None:
+                return (im, im_info)
+            else:
+                return (im, im_info, label_info)
+        outputs = decode_image(im, im_info, label_info)
+        im = outputs[0]
+        im_info = outputs[1]
+        if len(outputs) == 3:
+            label_info = outputs[2]
+        for op in self.transforms:
+            if im is None:
+                return None
+            outputs = op(im, im_info, label_info)
+            im = outputs[0]
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print(
+                    "{} is already in ComposedTransforms, need to remove it from add_augmenters().".
+                    format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class ResizeByShort(DetTransform):
+    """根据图像的短边调整图像大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度。
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        target_size (int): 短边目标长度。默认为800。
+        max_size (int): 长边目标长度的最大限制。默认为1333。
+     Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, short_size=800, max_size=1333):
+        self.max_size = int(max_size)
+        if not isinstance(short_size, int):
+            raise TypeError(
+                "Type of short_size is invalid. Must be Integer, now is {}".
+                format(type(short_size)))
+        self.short_size = short_size
+        if not (isinstance(self.max_size, int)):
+            raise TypeError("max_size: input type is invalid.")
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+                   其中，im_info更新字段为：
+                       - im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例
+                                                 三者组成的np.ndarray，形状为(3,)。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeByShort: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeByShort: image is not 3-dimensional.')
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im_resize_info = [resized_height, resized_width, scale]
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_LINEAR)
+        im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class Padding(DetTransform):
+    """1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640],
+       `coarest_stride`为32，则由于300不为32的倍数，因此在图像最右和最下使用0值
+       进行padding，最终输出图像为[320, 640]。
+       2.或者，将图像的长和宽padding到target_size指定的shape，如输入的图像为[300，640]，
+         a. `target_size` = 960，在图像最右和最下使用0值进行padding，最终输出
+            图像为[960, 960]。
+         b. `target_size` = [640, 960]，在图像最右和最下使用0值进行padding，最终
+            输出图像为[640, 960]。
+    1. 如果coarsest_stride为1，target_size为None则直接返回。
+    2. 获取图像的高H、宽W。
+    3. 计算填充后图像的高H_new、宽W_new。
+    4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray，
+       并将原图的np.ndarray粘贴于左上角。
+    Args:
+        coarsest_stride (int): 填充后的图像长、宽为该参数的倍数，默认为1。
+        target_size (int|list|tuple): 填充后的图像长、宽，默认为None，coarset_stride优先级更高。
+    Raises:
+        TypeError: 形参`target_size`数据类型不满足需求。
+        ValueError: 形参`target_size`为(list|tuple)时，长度不满足需求。
+    """
+    def __init__(self, coarsest_stride=1, target_size=None):
+        self.coarsest_stride = coarsest_stride
+        if target_size is not None:
+            if not isinstance(target_size, int):
+                if not isinstance(target_size, tuple) and not isinstance(
+                        target_size, list):
+                    raise TypeError(
+                        "Padding: Type of target_size must in (int|list|tuple)."
+                    )
+                elif len(target_size) != 2:
+                    raise ValueError(
+                        "Padding: Length of target_size must equal 2.")
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+            ValueError: coarsest_stride，target_size需有且只有一个被指定。
+            ValueError: target_size小于原图的大小。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("Padding: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('Padding: image is not 3-dimensional.')
+        im_h, im_w, im_c = im.shape[:]
+        if isinstance(self.target_size, int):
+            padding_im_h = self.target_size
+            padding_im_w = self.target_size
+        elif isinstance(self.target_size, list) or isinstance(self.target_size,
+                                                              tuple):
+            padding_im_w = self.target_size[0]
+            padding_im_h = self.target_size[1]
+        elif self.coarsest_stride > 0:
+            padding_im_h = int(
+                np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride)
+            padding_im_w = int(
+                np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride)
+        else:
+            raise ValueError(
+                "coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform"
+            )
+        pad_height = padding_im_h - im_h
+        pad_width = padding_im_w - im_w
+        if pad_height < 0 or pad_width < 0:
+            raise ValueError(
+                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
+                .format(im_w, im_h, padding_im_w, padding_im_h))
+        padding_im = np.zeros(
+            (padding_im_h, padding_im_w, im_c), dtype=np.float32)
+        padding_im[:im_h, :im_w, :] = im
+        if label_info is None:
+            return (padding_im, im_info)
+        else:
+            return (padding_im, im_info, label_info)
+class Resize(DetTransform):
+    """调整图像大小（resize）。
+    - 当目标大小（target_size）类型为int时，根据插值方式，
+      将图像resize为[target_size, target_size]。
+    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
+      将图像resize为target_size。
+    注意：当插值方式为“RANDOM”时，则随机选取一种插值方式进行resize。
+    Args:
+        target_size (int/list/tuple): 短边目标长度。默认为608。
+        interp (str): resize的插值方式，与opencv的插值方式对应，取值范围为
+            ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
+                    'AREA', 'LANCZOS4', 'RANDOM']中。
+    """
+    # The interpolation mode
+    interp_dict = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'LANCZOS4': cv2.INTER_LANCZOS4
+    }
+    def __init__(self, target_size=608, interp='LINEAR'):
+        self.interp = interp
+        if not (interp == "RANDOM" or interp in self.interp_dict):
+            raise ValueError("interp should be one of {}".format(
+                self.interp_dict.keys()))
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise TypeError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("Resize: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('Resize: image is not 3-dimensional.')
+        if self.interp == "RANDOM":
+            interp = random.choice(list(self.interp_dict.keys()))
+        else:
+            interp = self.interp
+        im = resize(im, self.target_size, self.interp_dict[interp])
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class Normalize(DetTransform):
+    """对图像进行标准化。
+    1. 归一化图像到到区间[0.0, 1.0]。
+    2. 对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
+        std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
+            raise TypeError("NormalizeImage: input type is invalid.")
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise TypeError('NormalizeImage: std is invalid!')
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class ArrangeYOLOv3(DetTransform):
+    """获取YOLOv3模型训练/验证/预测所需信息。
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
+    """
+    def __init__(self, mode=None):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode must be in ['train', 'eval', 'test', 'quant']!")
+        self.mode = mode
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当mode为'train'时，返回(im, gt_bbox, gt_class, gt_score, im_shape)，分别对应
+                图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息；
+                当mode为'eval'时，返回(im, im_shape, im_id, gt_bbox, gt_class, difficult)，
+                分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、
+                真实标注框是否为难识别对象；当mode为'test'或'quant'时，返回(im, im_shape)，
+                分别对应图像np.ndarray数据、图像大小信息。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        im = permute(im, False)
+        if self.mode == 'train':
+            pass
+        elif self.mode == 'eval':
+            pass
+        else:
+            if im_info is None:
+                raise TypeError('Cannot do ArrangeYolov3! ' +
+                                'Becasuse the im_info can not be None!')
+            im_shape = im_info['image_shape']
+            outputs = (im, im_shape)
+        return outputs
+class ComposedYOLOv3Transforms(Compose):
+    """YOLOv3模型的图像预处理流程，具体如下，
+        训练阶段：
+        1. 在前mixup_epoch轮迭代中，使用MixupImage策略，见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
+        2. 对图像进行随机扰动，包括亮度，对比度，饱和度和色调
+        3. 随机扩充图像，见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
+        4. 随机裁剪图像
+        5. 将4步骤的输出图像Resize成shape参数的大小
+        6. 随机0.5的概率水平翻转图像
+        7. 图像归一化
+        验证/预测阶段：
+        1. 将图像Resize成shape参数大小
+        2. 图像归一化
+        Args:
+            mode(str): 图像处理流程所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            shape(list): 输入模型中图像的大小，输入模型的图像会被Resize成此大小
+            mixup_epoch(int): 模型训练过程中，前mixup_epoch会使用mixup策略
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 shape=[608, 608],
+                 mixup_epoch=250,
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = shape
+        if isinstance(shape, list):
+            if shape[0] != shape[1]:
+                raise Exception(
+                    "In YOLOv3 model, width and height should be equal")
+            width = shape[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
+            )
+        if mode == 'train':
+            # 训练时的transforms，包含数据增强
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                Resize(
+                    target_size=width, interp='CUBIC'), Normalize(
+                        mean=mean, std=std)
+            ]
+        super(ComposedYOLOv3Transforms, self).__init__(transforms)
--- a/deploy/openvino/python/transforms/ops.py
+++ b/deploy/openvino/python/transforms/ops.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import math
+import numpy as np
+from PIL import Image, ImageEnhance
+def normalize(im, mean, std):
+    im = im / 255.0
+    im -= mean
+    im /= std
+    return im
+def permute(im, to_bgr=False):
+    im = np.swapaxes(im, 1, 2)
+    im = np.swapaxes(im, 1, 0)
+    if to_bgr:
+        im = im[[2, 1, 0], :, :]
+    return im
+def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
+    value = max(im.shape[0], im.shape[1])
+    scale = float(long_size) / float(value)
+    resized_width = int(round(im.shape[1] * scale))
+    resized_height = int(round(im.shape[0] * scale))
+    im = cv2.resize(
+        im, (resized_width, resized_height), interpolation=interpolation)
+    return im
+def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
+    if isinstance(target_size, list) or isinstance(target_size, tuple):
+        w = target_size[0]
+        h = target_size[1]
+    else:
+        w = target_size
+        h = target_size
+    im = cv2.resize(im, (w, h), interpolation=interp)
+    return im
+def random_crop(im,
+                crop_size=224,
+                lower_scale=0.08,
+                lower_ratio=3. / 4,
+                upper_ratio=4. / 3):
+    scale = [lower_scale, 1.0]
+    ratio = [lower_ratio, upper_ratio]
+    aspect_ratio = math.sqrt(np.random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+    bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
+                (float(im.shape[1]) / im.shape[0]) / (w**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+    target_area = im.shape[0] * im.shape[1] * np.random.uniform(
+        scale_min, scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+    i = np.random.randint(0, im.shape[0] - h + 1)
+    j = np.random.randint(0, im.shape[1] - w + 1)
+    im = im[i:i + h, j:j + w, :]
+    im = cv2.resize(im, (crop_size, crop_size))
+    return im
+def center_crop(im, crop_size=224):
+    height, width = im.shape[:2]
+    w_start = (width - crop_size) // 2
+    h_start = (height - crop_size) // 2
+    w_end = w_start + crop_size
+    h_end = h_start + crop_size
+    im = im[h_start:h_end, w_start:w_end, :]
+    return im
+def horizontal_flip(im):
+    if len(im.shape) == 3:
+        im = im[:, ::-1, :]
+    elif len(im.shape) == 2:
+        im = im[:, ::-1]
+    return im
+def vertical_flip(im):
+    if len(im.shape) == 3:
+        im = im[::-1, :, :]
+    elif len(im.shape) == 2:
+        im = im[::-1, :]
+    return im
+def bgr2rgb(im):
+    return im[:, :, ::-1]
+def hue(im, hue_lower, hue_upper):
+    delta = np.random.uniform(hue_lower, hue_upper)
+    u = np.cos(delta * np.pi)
+    w = np.sin(delta * np.pi)
+    bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
+    tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
+                     [0.211, -0.523, 0.311]])
+    ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
+                      [1.0, -1.107, 1.705]])
+    t = np.dot(np.dot(ityiq, bt), tyiq).T
+    im = np.dot(im, t)
+    return im
+def saturation(im, saturation_lower, saturation_upper):
+    delta = np.random.uniform(saturation_lower, saturation_upper)
+    gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
+    gray = gray.sum(axis=2, keepdims=True)
+    gray *= (1.0 - delta)
+    im *= delta
+    im += gray
+    return im
+def contrast(im, contrast_lower, contrast_upper):
+    delta = np.random.uniform(contrast_lower, contrast_upper)
+    im *= delta
+    return im
+def brightness(im, brightness_lower, brightness_upper):
+    delta = np.random.uniform(brightness_lower, brightness_upper)
+    im += delta
+    return im
+def rotate(im, rotate_lower, rotate_upper):
+    rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
+    im = im.rotate(int(rotate_delta))
+    return im
+def resize_padding(im, max_side_len=2400):
+    '''
+    resize image to a size multiple of 32 which is required by the network
+    :param im: the resized image
+    :param max_side_len: limit of max image size to avoid out of memory in gpu
+    :return: the resized image and the resize ratio
+    '''
+    h, w, _ = im.shape
+    resize_w = w
+    resize_h = h
+    # limit the max side
+    if max(resize_h, resize_w) > max_side_len:
+        ratio = float(
+            max_side_len) / resize_h if resize_h > resize_w else float(
+                max_side_len) / resize_w
+    else:
+        ratio = 1.
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
+    resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
+    resize_h = max(32, resize_h)
+    resize_w = max(32, resize_w)
+    im = cv2.resize(im, (int(resize_w), int(resize_h)))
+    #im = cv2.resize(im, (512, 512))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
+    return im, _ratio
--- a/deploy/openvino/python/transforms/seg_transforms.py
+++ b/deploy/openvino/python/transforms/seg_transforms.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .ops import *
+import random
+import os.path as osp
+import numpy as np
+from PIL import Image
+import cv2
+from collections import OrderedDict
+class SegTransform:
+    """ 分割transform基类
+    """
+    def __init__(self):
+        pass
+class Compose(SegTransform):
+    """根据数据预处理/增强算子对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强算子。
+    Raises:
+        TypeError: transforms不是list对象
+        ValueError: transforms元素个数小于1。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+        self.to_rgb = False
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
+        """
+        if im_info is None:
+            im_info = list()
+        if isinstance(im, np.ndarray):
+            if len(im.shape) != 3:
+                raise Exception(
+                    "im should be 3-dimensions, but now is {}-dimensions".
+                    format(len(im.shape)))
+        else:
+            try:
+                im = cv2.imread(im).astype('float32')
+            except:
+                raise ValueError('Can\'t read The image file {}!'.format(im))
+        if self.to_rgb:
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        if label is not None:
+            if not isinstance(label, np.ndarray):
+                label = np.asarray(Image.open(label))
+        for op in self.transforms:
+            if isinstance(op, SegTransform):
+                outputs = op(im, im_info, label)
+                im = outputs[0]
+                if len(outputs) >= 2:
+                    im_info = outputs[1]
+                if len(outputs) == 3:
+                    label = outputs[2]
+            else:
+                im = execute_imgaug(op, im)
+                if label is not None:
+                    outputs = (im, im_info, label)
+                else:
+                    outputs = (im, im_info)
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class RandomHorizontalFlip(SegTransform):
+    """以一定的概率对图像进行水平翻转。当存在标注图像时，则同步进行翻转。
+    Args:
+        prob (float): 随机水平翻转的概率。默认值为0.5。
+    """
+    def __init__(self, prob=0.5):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if random.random() < self.prob:
+            im = horizontal_flip(im)
+            if label is not None:
+                label = horizontal_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomVerticalFlip(SegTransform):
+    """以一定的概率对图像进行垂直翻转。当存在标注图像时，则同步进行翻转。
+    Args:
+        prob (float): 随机垂直翻转的概率。默认值为0.1。
+    """
+    def __init__(self, prob=0.1):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if random.random() < self.prob:
+            im = vertical_flip(im)
+            if label is not None:
+                label = vertical_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Resize(SegTransform):
+    """调整图像大小（resize），当存在标注图像时，则同步进行处理。
+    - 当目标大小（target_size）类型为int时，根据插值方式，
+      将图像resize为[target_size, target_size]。
+    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
+      将图像resize为target_size, target_size的输入应为[w, h]或（w, h）。
+    Args:
+        target_size (int|list|tuple): 目标大小。
+        interp (str): resize的插值方式，与opencv的插值方式对应，
+            可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']，默认为"LINEAR"。
+    Raises:
+        TypeError: target_size不是int/list/tuple。
+        ValueError:  target_size为list/tuple时元素个数不等于2。
+        AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。
+    """
+    # The interpolation mode
+    interp_dict = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'LANCZOS4': cv2.INTER_LANCZOS4
+    }
+    def __init__(self, target_size, interp='LINEAR'):
+        self.interp = interp
+        assert interp in self.interp_dict, "interp should be one of {}".format(
+            interp_dict.keys())
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info跟新字段为：
+                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
+        Raises:
+            ZeroDivisionError: im的短边为0。
+            TypeError: im不是np.ndarray数据。
+            ValueError: im不是3维nd.ndarray。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('resize', im.shape[:2]))
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeImage: image type is not np.ndarray.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeImage: image is not 3-dimensional.')
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        if float(im_size_min) == 0:
+            raise ZeroDivisionError('ResizeImage: min size of image is 0')
+        if isinstance(self.target_size, int):
+            resize_w = self.target_size
+            resize_h = self.target_size
+        else:
+            resize_w = self.target_size[0]
+            resize_h = self.target_size[1]
+        im_scale_x = float(resize_w) / float(im_shape[1])
+        im_scale_y = float(resize_h) / float(im_shape[0])
+        im = cv2.resize(
+            im,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp_dict[self.interp])
+        if label is not None:
+            label = cv2.resize(
+                label,
+                None,
+                None,
+                fx=im_scale_x,
+                fy=im_scale_y,
+                interpolation=self.interp_dict['NEAREST'])
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeByLong(SegTransform):
+    """对图像长边resize到固定值，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
+    Args:
+        long_size (int): resize后图像的长边大小。
+    """
+    def __init__(self, long_size):
+        self.long_size = long_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info新增字段为：
+                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('resize', im.shape[:2]))
+        im = resize_long(im, self.long_size)
+        if label is not None:
+            label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeByShort(SegTransform):
+    """根据图像的短边调整图像大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度。
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        target_size (int): 短边目标长度。默认为800。
+        max_size (int): 长边目标长度的最大限制。默认为1333。
+     Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, short_size=800, max_size=1333):
+        self.max_size = int(max_size)
+        if not isinstance(short_size, int):
+            raise TypeError(
+                "Type of short_size is invalid. Must be Integer, now is {}".
+                format(type(short_size)))
+        self.short_size = short_size
+        if not (isinstance(self.max_size, int)):
+            raise TypeError("max_size: input type is invalid.")
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                   存储与图像相关信息的字典和标注图像np.ndarray数据。
+                   其中，im_info更新字段为：
+                       -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeByShort: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeByShort: image is not 3-dimensional.')
+        im_info.append(('resize', im.shape[:2]))
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_NEAREST)
+        if label is not None:
+            im = cv2.resize(
+                label, (resized_width, resized_height),
+                interpolation=cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeRangeScaling(SegTransform):
+    """对图像长边随机resize到指定范围内，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
+    Args:
+        min_value (int): 图像长边resize后的最小值。默认值400。
+        max_value (int): 图像长边resize后的最大值。默认值600。
+    Raises:
+        ValueError: min_value大于max_value
+    """
+    def __init__(self, min_value=400, max_value=600):
+        if min_value > max_value:
+            raise ValueError('min_value must be less than max_value, '
+                             'but they are {} and {}.'.format(min_value,
+                                                              max_value))
+        self.min_value = min_value
+        self.max_value = max_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_value == self.max_value:
+            random_size = self.max_value
+        else:
+            random_size = int(
+                np.random.uniform(self.min_value, self.max_value) + 0.5)
+        im = resize_long(im, random_size, cv2.INTER_LINEAR)
+        if label is not None:
+            label = resize_long(label, random_size, cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeStepScaling(SegTransform):
+    """对图像按照某一个比例resize，这个比例以scale_step_size为步长
+    在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时，则同步进行处理。
+    Args:
+        min_scale_factor（float), resize最小尺度。默认值0.75。
+        max_scale_factor (float), resize最大尺度。默认值1.25。
+        scale_step_size (float), resize尺度范围间隔。默认值0.25。
+    Raises:
+        ValueError: min_scale_factor大于max_scale_factor
+    """
+    def __init__(self,
+                 min_scale_factor=0.75,
+                 max_scale_factor=1.25,
+                 scale_step_size=0.25):
+        if min_scale_factor > max_scale_factor:
+            raise ValueError(
+                'min_scale_factor must be less than max_scale_factor, '
+                'but they are {} and {}.'.format(min_scale_factor,
+                                                 max_scale_factor))
+        self.min_scale_factor = min_scale_factor
+        self.max_scale_factor = max_scale_factor
+        self.scale_step_size = scale_step_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_scale_factor == self.max_scale_factor:
+            scale_factor = self.min_scale_factor
+        elif self.scale_step_size == 0:
+            scale_factor = np.random.uniform(self.min_scale_factor,
+                                             self.max_scale_factor)
+        else:
+            num_steps = int((self.max_scale_factor - self.min_scale_factor) /
+                            self.scale_step_size + 1)
+            scale_factors = np.linspace(self.min_scale_factor,
+                                        self.max_scale_factor,
+                                        num_steps).tolist()
+            np.random.shuffle(scale_factors)
+            scale_factor = scale_factors[0]
+        im = cv2.resize(
+            im, (0, 0),
+            fx=scale_factor,
+            fy=scale_factor,
+            interpolation=cv2.INTER_LINEAR)
+        if label is not None:
+            label = cv2.resize(
+                label, (0, 0),
+                fx=scale_factor,
+                fy=scale_factor,
+                interpolation=cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Normalize(SegTransform):
+    """对图像进行标准化。
+    1.尺度缩放到 [0,1]。
+    2.对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
+        std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
+    Raises:
+        ValueError: mean或std不是list对象。std包含0。
+    """
+    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
+            raise ValueError("{}: input type is invalid.".format(self))
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise ValueError('{}: std is invalid!'.format(self))
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+         Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Padding(SegTransform):
+    """对图像或标注图像进行padding，padding方向为右和下。
+    根据提供的值对图像或标注图像进行padding操作。
+    Args:
+        target_size (int|list|tuple): padding后图像的大小。
+        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
+        label_padding_value (int): 标注图像padding的值。默认值为255。
+    Raises:
+        TypeError: target_size不是int|list|tuple。
+        ValueError:  target_size为list|tuple时元素个数不等于2。
+    """
+    def __init__(self,
+                 target_size,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info新增字段为：
+                    -shape_before_padding (tuple): 保存padding之前图像的形状(h, w）。
+        Raises:
+            ValueError: 输入图像im或label的形状大于目标值
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('padding', im.shape[:2]))
+        im_height, im_width = im.shape[0], im.shape[1]
+        if isinstance(self.target_size, int):
+            target_height = self.target_size
+            target_width = self.target_size
+        else:
+            target_height = self.target_size[1]
+            target_width = self.target_size[0]
+        pad_height = target_height - im_height
+        pad_width = target_width - im_width
+        if pad_height < 0 or pad_width < 0:
+            raise ValueError(
+                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
+                .format(im_width, im_height, target_width, target_height))
+        else:
+            im = cv2.copyMakeBorder(
+                im,
+                0,
+                pad_height,
+                0,
+                pad_width,
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
+            if label is not None:
+                label = cv2.copyMakeBorder(
+                    label,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.label_padding_value)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomPaddingCrop(SegTransform):
+    """对图像和标注图进行随机裁剪，当所需要的裁剪尺寸大于原图时，则进行padding操作。
+    Args:
+        crop_size (int|list|tuple): 裁剪图像大小。默认为512。
+        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
+        label_padding_value (int): 标注图像padding的值。默认值为255。
+    Raises:
+        TypeError: crop_size不是int/list/tuple。
+        ValueError:  target_size为list/tuple时元素个数不等于2。
+    """
+    def __init__(self,
+                 crop_size=512,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(crop_size, list) or isinstance(crop_size, tuple):
+            if len(crop_size) != 2:
+                raise ValueError(
+                    'when crop_size is list or tuple, it should include 2 elements, but it is {}'
+                    .format(crop_size))
+        elif not isinstance(crop_size, int):
+            raise TypeError(
+                "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(crop_size)))
+        self.crop_size = crop_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+         Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if isinstance(self.crop_size, int):
+            crop_width = self.crop_size
+            crop_height = self.crop_size
+        else:
+            crop_width = self.crop_size[0]
+            crop_height = self.crop_size[1]
+        img_height = im.shape[0]
+        img_width = im.shape[1]
+        if img_height == crop_height and img_width == crop_width:
+            if label is None:
+                return (im, im_info)
+            else:
+                return (im, im_info, label)
+        else:
+            pad_height = max(crop_height - img_height, 0)
+            pad_width = max(crop_width - img_width, 0)
+            if (pad_height > 0 or pad_width > 0):
+                im = cv2.copyMakeBorder(
+                    im,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.im_padding_value)
+                if label is not None:
+                    label = cv2.copyMakeBorder(
+                        label,
+                        0,
+                        pad_height,
+                        0,
+                        pad_width,
+                        cv2.BORDER_CONSTANT,
+                        value=self.label_padding_value)
+                img_height = im.shape[0]
+                img_width = im.shape[1]
+            if crop_height > 0 and crop_width > 0:
+                h_off = np.random.randint(img_height - crop_height + 1)
+                w_off = np.random.randint(img_width - crop_width + 1)
+                im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width
+                                                            ), :]
+                if label is not None:
+                    label = label[h_off:(crop_height + h_off), w_off:(
+                        w_off + crop_width)]
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomBlur(SegTransform):
+    """以一定的概率对图像进行高斯模糊。
+    Args：
+        prob (float): 图像模糊概率。默认为0.1。
+    """
+    def __init__(self, prob=0.1):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.prob <= 0:
+            n = 0
+        elif self.prob >= 1:
+            n = 1
+        else:
+            n = int(1.0 / self.prob)
+        if n > 0:
+            if np.random.randint(0, n) == 0:
+                radius = np.random.randint(3, 10)
+                if radius % 2 != 1:
+                    radius = radius + 1
+                if radius > 9:
+                    radius = 9
+                im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomScaleAspect(SegTransform):
+    """裁剪并resize回原始尺寸的图像和标注图像。
+    按照一定的面积比和宽高比对图像进行裁剪，并reszie回原始图像的图像，当存在标注图时，同步进行。
+    Args：
+        min_scale (float)：裁取图像占原始图像的面积比，取值[0，1]，为0时则返回原图。默认为0.5。
+        aspect_ratio (float): 裁取图像的宽高比范围，非负值，为0时返回原图。默认为0.33。
+    """
+    def __init__(self, min_scale=0.5, aspect_ratio=0.33):
+        self.min_scale = min_scale
+        self.aspect_ratio = aspect_ratio
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_scale != 0 and self.aspect_ratio != 0:
+            img_height = im.shape[0]
+            img_width = im.shape[1]
+            for i in range(0, 10):
+                area = img_height * img_width
+                target_area = area * np.random.uniform(self.min_scale, 1.0)
+                aspectRatio = np.random.uniform(self.aspect_ratio,
+                                                1.0 / self.aspect_ratio)
+                dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
+                dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
+                if (np.random.randint(10) < 5):
+                    tmp = dw
+                    dw = dh
+                    dh = tmp
+                if (dh < img_height and dw < img_width):
+                    h1 = np.random.randint(0, img_height - dh)
+                    w1 = np.random.randint(0, img_width - dw)
+                    im = im[h1:(h1 + dh), w1:(w1 + dw), :]
+                    label = label[h1:(h1 + dh), w1:(w1 + dw)]
+                    im = cv2.resize(
+                        im, (img_width, img_height),
+                        interpolation=cv2.INTER_LINEAR)
+                    label = cv2.resize(
+                        label, (img_width, img_height),
+                        interpolation=cv2.INTER_NEAREST)
+                    break
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomDistort(SegTransform):
+    """对图像进行随机失真。
+    1. 对变换的操作顺序进行随机化操作。
+    2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
+    Args:
+        brightness_range (float): 明亮度因子的范围。默认为0.5。
+        brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
+        contrast_range (float): 对比度因子的范围。默认为0.5。
+        contrast_prob (float): 随机调整对比度的概率。默认为0.5。
+        saturation_range (float): 饱和度因子的范围。默认为0.5。
+        saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
+        hue_range (int): 色调因子的范围。默认为18。
+        hue_prob (float): 随机调整色调的概率。默认为0.5。
+    """
+    def __init__(self,
+                 brightness_range=0.5,
+                 brightness_prob=0.5,
+                 contrast_range=0.5,
+                 contrast_prob=0.5,
+                 saturation_range=0.5,
+                 saturation_prob=0.5,
+                 hue_range=18,
+                 hue_prob=0.5):
+        self.brightness_range = brightness_range
+        self.brightness_prob = brightness_prob
+        self.contrast_range = contrast_range
+        self.contrast_prob = contrast_prob
+        self.saturation_range = saturation_range
+        self.saturation_prob = saturation_prob
+        self.hue_range = hue_range
+        self.hue_prob = hue_prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        brightness_lower = 1 - self.brightness_range
+        brightness_upper = 1 + self.brightness_range
+        contrast_lower = 1 - self.contrast_range
+        contrast_upper = 1 + self.contrast_range
+        saturation_lower = 1 - self.saturation_range
+        saturation_upper = 1 + self.saturation_range
+        hue_lower = -self.hue_range
+        hue_upper = self.hue_range
+        ops = [brightness, contrast, saturation, hue]
+        random.shuffle(ops)
+        params_dict = {
+            'brightness': {
+                'brightness_lower': brightness_lower,
+                'brightness_upper': brightness_upper
+            },
+            'contrast': {
+                'contrast_lower': contrast_lower,
+                'contrast_upper': contrast_upper
+            },
+            'saturation': {
+                'saturation_lower': saturation_lower,
+                'saturation_upper': saturation_upper
+            },
+            'hue': {
+                'hue_lower': hue_lower,
+                'hue_upper': hue_upper
+            }
+        }
+        prob_dict = {
+            'brightness': self.brightness_prob,
+            'contrast': self.contrast_prob,
+            'saturation': self.saturation_prob,
+            'hue': self.hue_prob
+        }
+        for id in range(4):
+            params = params_dict[ops[id].__name__]
+            prob = prob_dict[ops[id].__name__]
+            params['im'] = im
+            if np.random.uniform(0, 1) < prob:
+                im = ops[id](**params)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ArrangeSegmenter(SegTransform):
+    """获取训练/验证/预测所需的信息。
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内
+    """
+    def __init__(self, mode):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
+            )
+        self.mode = mode
+    def __call__(self, im, im_info, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当mode为'train'或'eval'时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当mode为'test'时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；当mode为
+                'quant'时，返回的tuple为(im,)，为图像np.ndarray数据。
+        """
+        im = permute(im, False)
+        if self.mode == 'train' or self.mode == 'eval':
+            label = label[np.newaxis, :, :]
+            return (im, label)
+        elif self.mode == 'test':
+            return (im, im_info)
+        else:
+            return (im, )
+class ComposedSegTransforms(Compose):
+    """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程，具体如下
+        训练阶段：
+        1. 随机对图像以0.5的概率水平翻转
+        2. 按不同的比例随机Resize原图
+        3. 从原图中随机crop出大小为train_crop_size大小的子图，如若crop出来的图小于train_crop_size，则会将图padding到对应大小
+        4. 图像归一化
+        预测阶段：
+        1. 图像归一化
+        Args:
+            mode(str): 图像处理所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            train_crop_size(list): 模型训练阶段，随机从原图crop的大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 train_crop_size=[769, 769],
+                 mean=[0.5, 0.5, 0.5],
+                 std=[0.5, 0.5, 0.5]):
+        if mode == 'train':
+            # 训练时的transforms，包含数据增强
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [Normalize(mean=mean, std=std)]
+        super(ComposedSegTransforms, self).__init__(transforms)
--- a/deploy/openvino/scripts/bootstrap.sh
+++ b/deploy/openvino/scripts/bootstrap.sh
-# download pre-compiled opencv lib
-OPENCV_URL=https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2
-if [ ! -d "./deps/opencv3gcc4.8" ]; then
-    mkdir -p deps
-    cd deps
-    wget -c ${OPENCV_URL}
-    tar xvfj opencv3gcc4.8.tar.bz2
-    rm -rf opencv3gcc4.8.tar.bz2
-    cd ..
-fi
--- a/deploy/openvino/scripts/build.sh
+++ b/deploy/openvino/scripts/build.sh
-# openvino预编译库的路径
+# OpenVINO预编译库的路径
-OPENVINO_DIR=/path/to/inference_engine/
+OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine
-# gflags预编译库的路径
-GFLAGS_DIR=/path/to/gflags
 # ngraph lib的路径，编译openvino时通常会生成
-NGRAPH_LIB=/path/to/ngraph/lib/
+NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib
+# gflags预编译库的路径
+GFLAGS_DIR=$(pwd)/deps/gflags
+# glog预编译库的路径
+GLOG_DIR=$(pwd)/deps/glog
+# opencv使用自带预编译版本
+OPENCV_DIR=$(pwd)/deps/opencv/
+#cpu架构
+ARCH=x86
+export ARCH
-# opencv预编译库的路径, 如果使用自带预编译版本可不修改
+#下载并编译third-part lib
-OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
+sh $(pwd)/scripts/install_third-party.sh
-# 下载自带预编译版本
-sh $(pwd)/scripts/bootstrap.sh
 rm -rf build
 mkdir -p build
@@ -16,6 +25,8 @@ cd build
 cmake .. \
    -DOPENCV_DIR=${OPENCV_DIR} \
    -DGFLAGS_DIR=${GFLAGS_DIR} \
+    -DGLOG_DIR=${GLOG_DIR} \
    -DOPENVINO_DIR=${OPENVINO_DIR} \
-    -DNGRAPH_LIB=${NGRAPH_LIB} 
+    -DNGRAPH_LIB=${NGRAPH_LIB} \
+    -DARCH=${ARCH}
 make
--- a/deploy/openvino/scripts/install_third-party.sh
+++ b/deploy/openvino/scripts/install_third-party.sh
+# download third-part lib
+if [ ! -d "./deps" ]; then
+    mkdir deps
+fi
+if [ ! -d "./deps/gflag" ]; then
+    cd deps
+    git clone https://github.com/gflags/gflags
+    cd gflags
+    cmake .
+    make -j 8
+    cd ..
+    cd ..
+fi
+if [ ! -d "./deps/glog" ]; then
+    cd deps
+    git clone https://github.com/google/glog
+    sudo apt-get install autoconf automake libtool
+    cd glog
+    ./autogen.sh
+    ./configure
+    make -j 8
+    cd ..
+    cd ..
+fi
+if [ "$ARCH" = "x86" ]; then
+    OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/x86opencv/opencv.tar.bz2
+else
+    OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2
+fi
+if [ ! -d "./deps/opencv" ]; then
+    cd deps
+    wget -c ${OPENCV_URL}
+    tar xvfj opencv.tar.bz2
+    rm -rf opencv.tar.bz2
+    cd ..
+fi
--- a/deploy/openvino/src/paddlex.cpp
+++ b/deploy/openvino/src/paddlex.cpp
@@ -13,28 +13,47 @@
 // limitations under the License.
 #include "include/paddlex/paddlex.h"
+#include <iostream>
+#include <fstream>
-using namespace InferenceEngine;
 namespace PaddleX {
 void Model::create_predictor(const std::string& model_dir,
-                            const std::string& cfg_dir,
+                            const std::string& cfg_file,
                            std::string device) {
-    Core ie;
+    InferenceEngine::Core ie;
-    network_ = ie.ReadNetwork(model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin");
+    network_ = ie.ReadNetwork(
+      model_dir, model_dir.substr(0, model_dir.size() - 4) + ".bin");
    network_.setBatchSize(1);
-    InputInfo::Ptr input_info = network_.getInputsInfo().begin()->second;
-    input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
+    InferenceEngine::InputsDataMap inputInfo(network_.getInputsInfo());
-    input_info->setLayout(Layout::NCHW);
+    std::string imageInputName;
-    input_info->setPrecision(Precision::FP32);
+    for (const auto & inputInfoItem : inputInfo) {
-    executable_network_ = ie.LoadNetwork(network_, device);
+      if (inputInfoItem.second->getTensorDesc().getDims().size() == 4) {
-    load_config(cfg_dir);
+        imageInputName = inputInfoItem.first;
+        inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32);
+        inputInfoItem.second->getPreProcess().setResizeAlgorithm(
+          InferenceEngine::RESIZE_BILINEAR);
+        inputInfoItem.second->setLayout(InferenceEngine::Layout::NCHW);
+      }
+      if (inputInfoItem.second->getTensorDesc().getDims().size() == 2) {
+        imageInputName = inputInfoItem.first;
+        inputInfoItem.second->setPrecision(InferenceEngine::Precision::FP32);
+      }
+    }
+    if (device == "MYRIAD") {
+      std::map<std::string, std::string> networkConfig;
+      networkConfig["VPU_HW_STAGES_OPTIMIZATION"] = "ON";
+      executable_network_ = ie.LoadNetwork(network_, device, networkConfig);
+    } else {
+      executable_network_ = ie.LoadNetwork(network_, device);
+    }
+    load_config(cfg_file);
 }
-bool Model::load_config(const std::string& cfg_dir) {
+bool Model::load_config(const std::string& cfg_file) {
-  YAML::Node config = YAML::LoadFile(cfg_dir);
+  YAML::Node config = YAML::LoadFile(cfg_file);
  type = config["_Attributes"]["model_type"].as<std::string>();
  name = config["Model"].as<std::string>();
  bool to_rgb = true;
@@ -48,22 +67,26 @@ bool Model::load_config(const std::string& cfg_dir) {
      return false;
    }
  }
-  // 构建数据处理流
+  // init preprocess ops
-  transforms_.Init(config["Transforms"], to_rgb);
+  transforms_.Init(config["Transforms"], type, to_rgb);
-  // 读入label list
+  // read label list
-  labels.clear();
+  for (const auto& item : config["_Attributes"]["labels"]) {
-  labels = config["_Attributes"]["labels"].as<std::vector<std::string>>();
+    int index = labels.size();
+    labels[index] = item.as<std::string>();
+  }
  return true;
 }
-bool Model::preprocess(cv::Mat* input_im) {
+bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) {
-  if (!transforms_.Run(input_im, inputs_)) {
+  if (!transforms_.Run(input_im, inputs)) {
    return false;
  }
  return true;
 }
 bool Model::predict(const cv::Mat& im, ClsResult* result) {
+  inputs_.clear();
  if (type == "detector") {
    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
                 "function predict()!"
@@ -75,34 +98,221 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
              << std::endl;
    return false;
  }
-  // 处理输入图像
+  // preprocess
-  InferRequest infer_request = executable_network_.CreateInferRequest();
+  InferenceEngine::InferRequest infer_request =
+    executable_network_.CreateInferRequest();
  std::string input_name = network_.getInputsInfo().begin()->first;
-  inputs_ = infer_request.GetBlob(input_name);
+  inputs_.blob = infer_request.GetBlob(input_name);
+  cv::Mat im_clone = im.clone();
-  auto im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
-  if (!preprocess(&im_clone)) {
    std::cerr << "Preprocess failed!" << std::endl;
    return false;
  }
+  // predict
  infer_request.Infer();
  std::string output_name = network_.getOutputsInfo().begin()->first;
  output_ = infer_request.GetBlob(output_name);
-  MemoryBlob::CPtr moutput = as<MemoryBlob>(output_);
+  InferenceEngine::MemoryBlob::CPtr moutput =
+    InferenceEngine::as<InferenceEngine::MemoryBlob>(output_);
  auto moutputHolder = moutput->rmap();
  float* outputs_data = moutputHolder.as<float *>();
-  // 对模型输出结果进行后处理
+  // post process
  auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data));
  result->category_id = std::distance(outputs_data, ptr);
  result->score = *ptr;
  result->category = labels[result->category_id];
-  //for (int i=0;i<sizeof(outputs_data);i++){
+  return true;
-  //    std::cout <<  labels[i] << std::endl;
+}
-  //    std::cout <<  outputs_[i] << std::endl;
-  //    }
+bool Model::predict(const cv::Mat& im, DetResult* result) {
+  inputs_.clear();
+  result->clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+  InferenceEngine::InferRequest infer_request =
+    executable_network_.CreateInferRequest();
+  InferenceEngine::InputsDataMap input_maps = network_.getInputsInfo();
+  std::string inputName;
+  for (const auto & input_map : input_maps) {
+    if (input_map.second->getTensorDesc().getDims().size() == 4) {
+      inputName = input_map.first;
+      inputs_.blob = infer_request.GetBlob(inputName);
+    }
+    if (input_map.second->getTensorDesc().getDims().size() == 2) {
+      inputName = input_map.first;
+      inputs_.ori_im_size_ = infer_request.GetBlob(inputName);
+    }
+  }
+  cv::Mat im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  infer_request.Infer();
+  InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo();
+  auto iter = out_map.begin();
+  std::string outputName = iter->first;
+  InferenceEngine::Blob::Ptr output = infer_request.GetBlob(outputName);
+  InferenceEngine::MemoryBlob::CPtr moutput =
+    InferenceEngine::as<InferenceEngine::MemoryBlob>(output);
+  InferenceEngine::TensorDesc blob_output = moutput->getTensorDesc();
+  std::vector<size_t> output_shape = blob_output.getDims();
+  auto moutputHolder = moutput->rmap();
+  float* data = moutputHolder.as<float *>();
+  int size = 1;
+  for (auto& i : output_shape) {
+    size *= static_cast<int>(i);
+  }
+  int num_boxes = size / 6;
+  for (int i = 0; i < num_boxes; ++i) {
+    if (data[i * 6] > 0) {
+      Box box;
+      box.category_id = static_cast<int>(data[i * 6]);
+      box.category = labels[box.category_id];
+      box.score = data[i * 6 + 1];
+      float xmin = data[i * 6 + 2];
+      float ymin = data[i * 6 + 3];
+      float xmax = data[i * 6 + 4];
+      float ymax = data[i * 6 + 5];
+      float w = xmax - xmin + 1;
+      float h = ymax - ymin + 1;
+      box.coordinate = {xmin, ymin, w, h};
+      result->boxes.push_back(std::move(box));
+    }
+  }
 }
-}  // namespce of PaddleX
+bool Model::predict(const cv::Mat& im, SegResult* result) {
+  result->clear();
+  inputs_.clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  }
+  // init infer
+  InferenceEngine::InferRequest infer_request =
+    executable_network_.CreateInferRequest();
+  std::string input_name = network_.getInputsInfo().begin()->first;
+  inputs_.blob = infer_request.GetBlob(input_name);
+  // preprocess
+  cv::Mat im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // predict
+  infer_request.Infer();
+  InferenceEngine::OutputsDataMap out_map = network_.getOutputsInfo();
+  auto iter = out_map.begin();
+  iter++;
+  std::string output_name_score = iter->first;
+  InferenceEngine::Blob::Ptr output_score =
+    infer_request.GetBlob(output_name_score);
+  InferenceEngine::MemoryBlob::CPtr moutput_score =
+    InferenceEngine::as<InferenceEngine::MemoryBlob>(output_score);
+  InferenceEngine::TensorDesc blob_score = moutput_score->getTensorDesc();
+  std::vector<size_t> output_score_shape = blob_score.getDims();
+  int size = 1;
+  for (auto& i : output_score_shape) {
+    size *= static_cast<int>(i);
+    result->score_map.shape.push_back(static_cast<int>(i));
+  }
+  result->score_map.data.resize(size);
+  auto moutputHolder_score = moutput_score->rmap();
+  float* score_data = moutputHolder_score.as<float *>();
+  memcpy(result->score_map.data.data(), score_data, moutput_score->byteSize());
+  iter++;
+  std::string output_name_label = iter->first;
+  InferenceEngine::Blob::Ptr output_label =
+    infer_request.GetBlob(output_name_label);
+  InferenceEngine::MemoryBlob::CPtr moutput_label =
+    InferenceEngine::as<InferenceEngine::MemoryBlob>(output_label);
+  InferenceEngine::TensorDesc blob_label = moutput_label->getTensorDesc();
+  std::vector<size_t> output_label_shape = blob_label.getDims();
+  size = 1;
+  for (auto& i : output_label_shape) {
+    size *= static_cast<int>(i);
+    result->label_map.shape.push_back(static_cast<int>(i));
+  }
+  result->label_map.data.resize(size);
+  auto moutputHolder_label = moutput_label->rmap();
+  int* label_data = moutputHolder_label.as<int *>();
+  memcpy(result->label_map.data.data(), label_data, moutput_label->byteSize());
+  std::vector<uint8_t> label_map(result->label_map.data.begin(),
+                                 result->label_map.data.end());
+  cv::Mat mask_label(result->label_map.shape[1],
+                     result->label_map.shape[2],
+                     CV_8UC1,
+                     label_map.data());
+  cv::Mat mask_score(result->score_map.shape[2],
+                     result->score_map.shape[3],
+                     CV_32FC1,
+                     result->score_map.data.data());
+  int idx = 1;
+  int len_postprocess = inputs_.im_size_before_resize_.size();
+  for (std::vector<std::string>::reverse_iterator iter =
+           inputs_.reshape_order_.rbegin();
+       iter != inputs_.reshape_order_.rend();
+       ++iter) {
+    if (*iter == "padding") {
+      auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
+      inputs_.im_size_before_resize_.pop_back();
+      auto padding_w = before_shape[0];
+      auto padding_h = before_shape[1];
+      mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
+      mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
+    } else if (*iter == "resize") {
+      auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
+      inputs_.im_size_before_resize_.pop_back();
+      auto resize_w = before_shape[0];
+      auto resize_h = before_shape[1];
+      cv::resize(mask_label,
+                 mask_label,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_NEAREST);
+      cv::resize(mask_score,
+                 mask_score,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_LINEAR);
+    }
+    ++idx;
+  }
+  result->label_map.data.assign(mask_label.begin<uint8_t>(),
+                                mask_label.end<uint8_t>());
+  result->label_map.shape = {mask_label.rows, mask_label.cols};
+  result->score_map.data.assign(mask_score.begin<float>(),
+                                mask_score.end<float>());
+  result->score_map.shape = {mask_score.rows, mask_score.cols};
+  return true;
+}
+}  // namespace PaddleX
--- a/deploy/openvino/src/transforms.cpp
+++ b/deploy/openvino/src/transforms.cpp
@@ -12,11 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "include/paddlex/transforms.h"
+#include <math.h>
 #include <iostream>
+#include <fstream>
 #include <string>
 #include <vector>
-#include "include/paddlex/transforms.h"
 namespace PaddleX {
@@ -26,7 +30,7 @@ std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
                                             {"CUBIC", cv::INTER_CUBIC},
                                             {"LANCZOS4", cv::INTER_LANCZOS4}};
-bool Normalize::Run(cv::Mat* im){
+bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
  for (int h = 0; h < im->rows; h++) {
    for (int w = 0; w < im->cols; w++) {
      im->at<cv::Vec3f>(h, w)[0] =
@@ -40,19 +44,6 @@ bool Normalize::Run(cv::Mat* im){
  return true;
 }
-bool CenterCrop::Run(cv::Mat* im) {
-  int height = static_cast<int>(im->rows);
-  int width = static_cast<int>(im->cols);
-  if (height < height_ || width < width_) {
-    std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
-    return false;
-  }
-  int offset_x = static_cast<int>((width - width_) / 2);
-  int offset_y = static_cast<int>((height - height_) / 2);
-  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
-  *im = (*im)(crop_roi);
-  return true;
-}
 float ResizeByShort::GenerateScale(const cv::Mat& im) {
@@ -70,17 +61,115 @@ float ResizeByShort::GenerateScale(const cv::Mat& im) {
  return scale;
 }
-bool ResizeByShort::Run(cv::Mat* im) {
+bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
  float scale = GenerateScale(*im);
-  int width = static_cast<int>(scale * im->cols);
+  int width = static_cast<int>(round(scale * im->cols));
-  int height = static_cast<int>(scale * im->rows);
+  int height = static_cast<int>(round(scale * im->rows));
  cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
  return true;
 }
-void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
+bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
+  int height = static_cast<int>(im->rows);
+  int width = static_cast<int>(im->cols);
+  if (height < height_ || width < width_) {
+    std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
+    return false;
+  }
+  int offset_x = static_cast<int>((width - width_) / 2);
+  int offset_y = static_cast<int>((height - height_) / 2);
+  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
+  *im = (*im)(crop_roi);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+bool Padding::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("padding");
+  int padding_w = 0;
+  int padding_h = 0;
+  if (width_ > 1 & height_ > 1) {
+    padding_w = width_ - im->cols;
+    padding_h = height_ - im->rows;
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
+    padding_h =
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+    padding_w =
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+  }
+  if (padding_h < 0 || padding_w < 0) {
+    std::cerr << "[Padding] Computed padding_h=" << padding_h
+              << ", padding_w=" << padding_w
+              << ", but they should be greater than 0." << std::endl;
+    return false;
+  }
+  cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
+  cv::copyMakeBorder(
+      *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
+  if (long_size_ <= 0) {
+    std::cerr << "[ResizeByLong] long_size should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  int im_size_max = std::max(origin_w, origin_h);
+  float scale =
+      static_cast<float>(long_size_) / static_cast<float>(im_size_max);
+  cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
+  return true;
+}
+bool Resize::Run(cv::Mat* im, ImageBlob* data) {
+  if (width_ <= 0 || height_ <= 0) {
+    std::cerr << "[Resize] width and height should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  if (interpolations.count(interp_) <= 0) {
+    std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
+  cv::resize(
+      *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+void Transforms::Init(
+  const YAML::Node& transforms_node, std::string type, bool to_rgb) {
  transforms_.clear();
  to_rgb_ = to_rgb;
+  type_ = type;
  for (const auto& item : transforms_node) {
    std::string name = item.begin()->first.as<std::string>();
    std::cout << "trans name: " << name << std::endl;
@@ -94,10 +183,16 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
    const std::string& transform_name) {
  if (transform_name == "Normalize") {
    return std::make_shared<Normalize>();
-  } else if (transform_name == "CenterCrop") {
-    return std::make_shared<CenterCrop>();
  } else if (transform_name == "ResizeByShort") {
    return std::make_shared<ResizeByShort>();
+  } else if (transform_name == "CenterCrop") {
+    return std::make_shared<CenterCrop>();
+  } else if (transform_name == "Resize") {
+    return std::make_shared<Resize>();
+  } else if (transform_name == "Padding") {
+    return std::make_shared<Padding>();
+  } else if (transform_name == "ResizeByLong") {
+    return std::make_shared<ResizeByLong>();
  } else {
    std::cerr << "There's unexpected transform(name='" << transform_name
              << "')." << std::endl;
@@ -105,27 +200,38 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
  }
 }
-bool Transforms::Run(cv::Mat* im, Blob::Ptr blob) {
+bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
-  // 按照transforms中预处理算子顺序处理图像
+  // preprocess by order
  if (to_rgb_) {
    cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
  }
  (*im).convertTo(*im, CV_32FC3);
+  if (type_ == "detector") {
+    InferenceEngine::LockedMemory<void> input2Mapped =
+      InferenceEngine::as<InferenceEngine::MemoryBlob>(
+        data->ori_im_size_)->wmap();
+    float *p = input2Mapped.as<float*>();
+    p[0] = im->rows;
+    p[1] = im->cols;
+  }
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
  for (int i = 0; i < transforms_.size(); ++i) {
-    if (!transforms_[i]->Run(im)) {
+    if (!transforms_[i]->Run(im, data)) {
      std::cerr << "Apply transforms to image failed!" << std::endl;
      return false;
    }
  }
-  // 将图像由NHWC转为NCHW格式
+  // image format NHWC to NCHW
-  // 同时转为连续的内存块存储到Blob
+  // img data save to ImageBlob
-  SizeVector blobSize = blob->getTensorDesc().getDims();
+  InferenceEngine::SizeVector blobSize = data->blob->getTensorDesc().getDims();
  const size_t width = blobSize[3];
  const size_t height = blobSize[2];
  const size_t channels = blobSize[1];
-  MemoryBlob::Ptr mblob = InferenceEngine::as<MemoryBlob>(blob);
+  InferenceEngine::MemoryBlob::Ptr mblob =
+    InferenceEngine::as<InferenceEngine::MemoryBlob>(data->blob);
  auto mblobHolder = mblob->wmap();
  float *blob_data = mblobHolder.as<float *>();
  for (size_t c = 0; c < channels; c++) {

--- a/deploy/openvino/src/visualize.cpp
+++ b/deploy/openvino/src/visualize.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "include/paddlex/visualize.h"
+namespace PaddleX {
+std::vector<int> GenerateColorMap(int num_class) {
+  auto colormap = std::vector<int>(3 * num_class, 0);
+  for (int i = 0; i < num_class; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return colormap;
+}
+cv::Mat Visualize(const cv::Mat& img,
+                     const DetResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold) {
+  cv::Mat vis_img = img.clone();
+  auto boxes = result.boxes;
+  for (int i = 0; i < boxes.size(); ++i) {
+    if (boxes[i].score < threshold) {
+      continue;
+    }
+    cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
+                            boxes[i].coordinate[1],
+                            boxes[i].coordinate[2],
+                            boxes[i].coordinate[3]);
+    // draw box and title
+    std::string text = boxes[i].category;
+    int c1 = colormap[3 * boxes[i].category_id + 0];
+    int c2 = colormap[3 * boxes[i].category_id + 1];
+    int c3 = colormap[3 * boxes[i].category_id + 2];
+    cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
+    text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
+    int font_face = cv::FONT_HERSHEY_SIMPLEX;
+    double font_scale = 0.5f;
+    float thickness = 0.5;
+    cv::Size text_size =
+        cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+    cv::Point origin;
+    origin.x = roi.x;
+    origin.y = roi.y;
+    // background
+    cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
+                                  boxes[i].coordinate[1] - text_size.height,
+                                  text_size.width,
+                                  text_size.height);
+    // draw
+    cv::rectangle(vis_img, roi, roi_color, 2);
+    cv::rectangle(vis_img, text_back, roi_color, -1);
+    cv::putText(vis_img,
+                text,
+                origin,
+                font_face,
+                font_scale,
+                cv::Scalar(255, 255, 255),
+                thickness);
+    // mask
+    if (boxes[i].mask.data.size() == 0) {
+      continue;
+    }
+    cv::Mat bin_mask(result.mask_resolution,
+                     result.mask_resolution,
+                     CV_32FC1,
+                     boxes[i].mask.data.data());
+    cv::resize(bin_mask,
+               bin_mask,
+               cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
+    cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
+    cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
+    bin_mask.copyTo(full_mask(roi));
+    cv::Mat mask_ch[3];
+    mask_ch[0] = full_mask * c1;
+    mask_ch[1] = full_mask * c2;
+    mask_ch[2] = full_mask * c3;
+    cv::Mat mask;
+    cv::merge(mask_ch, 3, mask);
+    cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
+  }
+  return vis_img;
+}
+cv::Mat Visualize(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap) {
+  std::vector<uint8_t> label_map(result.label_map.data.begin(),
+                                 result.label_map.data.end());
+  cv::Mat mask(result.label_map.shape[0],
+               result.label_map.shape[1],
+               CV_8UC1,
+               label_map.data());
+  cv::Mat color_mask = cv::Mat::zeros(
+      result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
+  int rows = img.rows;
+  int cols = img.cols;
+  for (int i = 0; i < rows; i++) {
+    for (int j = 0; j < cols; j++) {
+      int category_id = static_cast<int>(mask.at<uchar>(i, j));
+      color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
+      color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
+      color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
+    }
+  }
+  return color_mask;
+}
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path) {
+  if (access(save_dir.c_str(), 0) < 0) {
+#ifdef _WIN32
+    mkdir(save_dir.c_str());
+#else
+    if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
+      std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
+    }
+#endif
+  }
+  int pos = file_path.find_last_of(OS_PATH_SEP);
+  std::string image_name(file_path.substr(pos + 1));
+  return save_dir + OS_PATH_SEP + image_name;
+}
+}  // namespace PaddleX
--- a/deploy/raspberry/CMakeLists.txt
+++ b/deploy/raspberry/CMakeLists.txt
+cmake_minimum_required(VERSION 3.0)
+project(PaddleX CXX C)
+option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   OFF)
+SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+SET(LITE_DIR "" CACHE PATH "Location of libraries")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(NGRAPH_LIB "" CACHE PATH "Location of libraries")
+include(cmake/yaml-cpp.cmake)
+include_directories("${CMAKE_SOURCE_DIR}/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+macro(safe_set_static_flag)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
+if (NOT DEFINED LITE_DIR OR ${LITE_DIR} STREQUAL "")
+	message(FATAL_ERROR "please set LITE_DIR with -LITE_DIR=/path/influence_engine")
+endif()
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+if (NOT DEFINED GFLAGS_DIR OR ${GFLAGS_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set GFLAGS_DIR with -DGFLAGS_DIR=/path/gflags")
+endif()
+link_directories("${LITE_DIR}/lib")
+include_directories("${LITE_DIR}/include")
+link_directories("${GFLAGS_DIR}/lib")
+include_directories("${GFLAGS_DIR}/include")
+if (WIN32)
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+  unset(OpenCV_DIR CACHE)
+else ()
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/cmake NO_DEFAULT_PATH)
+endif ()
+include_directories(${OpenCV_INCLUDE_DIRS})
+if (WIN32)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+    if (WITH_STATIC_LIB)
+        safe_set_static_flag()
+        add_definitions(-DSTATIC_LIB)
+    endif()
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon-vfpv4 -g -o2 -fopenmp -std=c++11")
+    set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+if(WITH_STATIC_LIB)
+	set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_STATIC_LIBRARY_SUFFIX})
+else()
+	set(DEPS ${LITE_DIR}/lib/libpaddle_full_api_shared${CMAKE_SHARED_LIBRARY_SUFFIX})
+endif()
+if (NOT WIN32)
+    set(DEPS ${DEPS}
+        glog gflags  z  yaml-cpp
+        )
+else()
+    set(DEPS ${DEPS}
+        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+    set(DEPS ${DEPS} libcmt shlwapi)
+endif(NOT WIN32)
+if (NOT WIN32)
+    set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
+    set(DEPS ${DEPS} ${EXTERNAL_LIB})
+endif()
+set(DEPS ${DEPS} ${OpenCV_LIBS})
+add_executable(classifier demo/classifier.cpp src/transforms.cpp src/paddlex.cpp)
+ADD_DEPENDENCIES(classifier ext-yaml-cpp)
+target_link_libraries(classifier ${DEPS})
+add_executable(segmenter demo/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
+target_link_libraries(segmenter ${DEPS})
+add_executable(detector demo/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(detector ext-yaml-cpp)
+target_link_libraries(detector ${DEPS})
--- a/deploy/raspberry/cmake/yaml-cpp.cmake
+++ b/deploy/raspberry/cmake/yaml-cpp.cmake
+include(ExternalProject)
+message("${CMAKE_BUILD_TYPE}")
+ExternalProject_Add(
+        ext-yaml-cpp
+        URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
+        URL_MD5 9542d6de397d1fbd649ed468cb5850e6
+        CMAKE_ARGS
+        -DYAML_CPP_BUILD_TESTS=OFF
+        -DYAML_CPP_BUILD_TOOLS=OFF
+        -DYAML_CPP_INSTALL=OFF
+        -DYAML_CPP_BUILD_CONTRIB=OFF
+        -DMSVC_SHARED_RT=OFF
+        -DBUILD_SHARED_LIBS=OFF
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+        -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+        -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+        -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
+        # Disable install step
+        INSTALL_COMMAND ""
+        LOG_DOWNLOAD ON
+        LOG_BUILD 1
+)
--- a/deploy/raspberry/demo/classifier.cpp
+++ b/deploy/raspberry/demo/classifier.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <glog/logging.h>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include "include/paddlex/paddlex.h"
+DEFINE_string(model_dir, "", "Path of inference model");
+DEFINE_string(cfg_file, "", "Path of PaddelX model yml file");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(thread_num, 1, "num of thread to infer");
+int main(int argc, char** argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_cfg_file == "") {
+    std::cerr << "--cfg_flie need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+  // load model
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
+  std::cout << "init is done" << std::endl;
+  // predict
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::ClsResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      std::cout << "Predict label: " << result.category
+                << ", label_id:" << result.category_id
+                << ", score: " << result.score << std::endl;
+    }
+  } else {
+    PaddleX::ClsResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    std::cout << "Predict label: " << result.category
+              << ", label_id:" << result.category_id
+              << ", score: " << result.score << std::endl;
+  }
+  return 0;
+}
--- a/deploy/raspberry/demo/detector.cpp
+++ b/deploy/raspberry/demo/detector.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <glog/logging.h>
+#include <omp.h>
+#include <algorithm>
+#include <chrono>  // NOLINT
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+using namespace std::chrono;  // NOLINT
+DEFINE_string(model_dir, "", "Path of openvino model xml file");
+DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_int32(thread_num, 1, "num of thread to infer");
+DEFINE_string(save_dir, "", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_double(threshold,
+              0.5,
+              "The minimum scores of target boxes which are shown");
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_cfg_file == "") {
+    std::cerr << "--cfg_file need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+  // load model
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
+  int imgs = 1;
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  // predict
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::DetResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      if (FLAGS_save_dir != "") {
+        cv::Mat vis_img = PaddleX::Visualize(
+          im, result, model.labels, colormap, FLAGS_threshold);
+        std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
+    }
+  } else {
+  PaddleX::DetResult result;
+  cv::Mat im = cv::imread(FLAGS_image, 1);
+  model.predict(im, &result);
+  for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << "image file: " << FLAGS_image << std::endl;
+      std::cout << ", predict label: " << result.boxes[i].category
+                << ", label_id:" << result.boxes[i].category_id
+                << ", score: " << result.boxes[i].score
+                << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
+                << ", " << result.boxes[i].coordinate[1] << ", "
+                << result.boxes[i].coordinate[2] << ", "
+                << result.boxes[i].coordinate[3] << ")" << std::endl;
+    }
+    if (FLAGS_save_dir != "") {
+    // visualize
+      cv::Mat vis_img = PaddleX::Visualize(
+        im, result, model.labels, colormap, FLAGS_threshold);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+      cv::imwrite(save_path, vis_img);
+      result.clear();
+      std::cout << "Visualized output saved as " << save_path << std::endl;
+    }
+  }
+  return 0;
+}
--- a/deploy/raspberry/demo/segmenter.cpp
+++ b/deploy/raspberry/demo/segmenter.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <glog/logging.h>
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+DEFINE_string(model_dir, "", "Path of openvino model xml file");
+DEFINE_string(cfg_file, "", "Path of PaddleX model yaml file");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(save_dir, "", "Path to save visualized image");
+DEFINE_int32(batch_size, 1, "Batch size of infering");
+DEFINE_int32(thread_num, 1, "num of thread to infer");
+int main(int argc, char** argv) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_cfg_file == "") {
+    std::cerr << "--cfg_file need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+  // load model
+  std::cout << "init start" << std::endl;
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_cfg_file, FLAGS_thread_num);
+  std::cout << "init done" << std::endl;
+  int imgs = 1;
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+    std::cerr << "Fail to open file " << FLAGS_image_list <<std::endl;
+    return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::SegResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      if (FLAGS_save_dir != "") {
+      cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
+        std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+        cv::imwrite(save_path, vis_img);
+        std::cout << "Visualized output saved as " << save_path << std::endl;
+      }
+    }
+  } else {
+    PaddleX::SegResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    if (FLAGS_save_dir != "") {
+      cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+      cv::imwrite(save_path, vis_img);
+      std::cout << "Visualized` output saved as " << save_path << std::endl;
+    }
+    result.clear();
+  }
+  return 0;
+}
--- a/deploy/raspberry/include/paddlex/config_parser.h
+++ b/deploy/raspberry/include/paddlex/config_parser.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include "yaml-cpp/yaml.h"
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+namespace PaddleX {
+// Inference model configuration parser
+class ConfigPaser {
+ public:
+  ConfigPaser() {}
+  ~ConfigPaser() {}
+  bool load_config(const std::string& model_dir,
+                   const std::string& cfg = "model.yml") {
+    // Load as a YAML::Node
+    YAML::Node config;
+    config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
+    if (config["Transforms"].IsDefined()) {
+      YAML::Node transforms_ = config["Transforms"];
+    } else {
+      std::cerr << "There's no field 'Transforms' in model.yml" << std::endl;
+      return false;
+    }
+    return true;
+  }
+  YAML::Node Transforms_;
+};
+}  // namespace PaddleX
--- a/deploy/raspberry/include/paddlex/paddlex.h
+++ b/deploy/raspberry/include/paddlex/paddlex.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <arm_neon.h>
+#include <paddle_api.h>
+#include <functional>
+#include <iostream>
+#include <numeric>
+#include <map>
+#include <string>
+#include <memory>
+#include "include/paddlex/config_parser.h"
+#include "include/paddlex/results.h"
+#include "include/paddlex/transforms.h"
+#include "yaml-cpp/yaml.h"
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+namespace PaddleX {
+class Model {
+ public:
+  void Init(const std::string& model_dir,
+            const std::string& cfg_file,
+            int thread_num) {
+    create_predictor(model_dir, cfg_file, thread_num);
+  }
+  void create_predictor(const std::string& model_dir,
+                        const std::string& cfg_file,
+                        int thread_num);
+  bool load_config(const std::string& model_dir);
+  bool preprocess(cv::Mat* input_im, ImageBlob* inputs);
+  bool predict(const cv::Mat& im, ClsResult* result);
+  bool predict(const cv::Mat& im, DetResult* result);
+  bool predict(const cv::Mat& im, SegResult* result);
+  std::string type;
+  std::string name;
+  std::map<int, std::string> labels;
+  Transforms transforms_;
+  ImageBlob inputs_;
+  std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
+};
+}  // namespace PaddleX
--- a/deploy/raspberry/include/paddlex/results.h
+++ b/deploy/raspberry/include/paddlex/results.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <iostream>
+#include <string>
+#include <vector>
+namespace PaddleX {
+template <class T>
+struct Mask {
+  std::vector<T> data;
+  std::vector<int> shape;
+  void clear() {
+    data.clear();
+    shape.clear();
+  }
+};
+struct Box {
+  int category_id;
+  std::string category;
+  float score;
+  std::vector<float> coordinate;
+  Mask<float> mask;
+};
+class BaseResult {
+ public:
+  std::string type = "base";
+};
+class ClsResult : public BaseResult {
+ public:
+  int category_id;
+  std::string category;
+  float score;
+  std::string type = "cls";
+};
+class DetResult : public BaseResult {
+ public:
+  std::vector<Box> boxes;
+  int mask_resolution;
+  std::string type = "det";
+  void clear() { boxes.clear(); }
+};
+class SegResult : public BaseResult {
+ public:
+  Mask<int64_t> label_map;
+  Mask<float> score_map;
+  void clear() {
+    label_map.clear();
+    score_map.clear();
+  }
+};
+}  // namespace PaddleX
--- a/deploy/raspberry/include/paddlex/transforms.h
+++ b/deploy/raspberry/include/paddlex/transforms.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <yaml-cpp/yaml.h>
+#include <paddle_api.h>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <iostream>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+namespace PaddleX {
+/*
+ * @brief
+ * This class represents object for storing all preprocessed data
+ * */
+class ImageBlob {
+ public:
+  // Original image height and width
+  std::vector<int> ori_im_size_ = std::vector<int>(2);
+  // Newest image height and width after process
+  std::vector<int> new_im_size_ = std::vector<int>(2);
+  // Image height and width before resize
+  std::vector<std::vector<int>> im_size_before_resize_;
+  // Reshape order
+  std::vector<std::string> reshape_order_;
+  // Resize scale
+  float scale = 1.0;
+  // Buffer for image data after preprocessing
+  std::unique_ptr<paddle::lite_api::Tensor> input_tensor_;
+  void clear() {
+    im_size_before_resize_.clear();
+    reshape_order_.clear();
+  }
+};
+// Abstraction of preprocessing opration class
+class Transform {
+ public:
+  virtual void Init(const YAML::Node& item) = 0;
+  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
+};
+class Normalize : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    mean_ = item["mean"].as<std::vector<float>>();
+    std_ = item["std"].as<std::vector<float>>();
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  std::vector<float> mean_;
+  std::vector<float> std_;
+};
+class ResizeByShort : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    short_size_ = item["short_size"].as<int>();
+    if (item["max_size"].IsDefined()) {
+      max_size_ = item["max_size"].as<int>();
+    } else {
+      max_size_ = -1;
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  float GenerateScale(const cv::Mat& im);
+  int short_size_;
+  int max_size_;
+};
+/*
+ * @brief
+ * This class execute resize by long operation on image matrix. At first, it resizes
+ * the long side of image matrix to specified length. Accordingly, the short side
+ * will be resized in the same proportion.
+ * */
+class ResizeByLong : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    long_size_ = item["long_size"].as<int>();
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int long_size_;
+};
+/*
+ * @brief
+ * This class execute resize operation on image matrix. It resizes width and height
+ * to specified length.
+ * */
+class Resize : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["interp"].IsDefined()) {
+      interp_ = item["interp"].as<std::string>();
+    }
+    if (item["target_size"].IsScalar()) {
+      height_ = item["target_size"].as<int>();
+      width_ = item["target_size"].as<int>();
+    } else if (item["target_size"].IsSequence()) {
+      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
+      width_ = target_size[0];
+      height_ = target_size[1];
+    }
+    if (height_ <= 0 || width_ <= 0) {
+      std::cerr << "[Resize] target_size should greater than 0" << std::endl;
+      exit(-1);
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int height_;
+  int width_;
+  std::string interp_;
+};
+class CenterCrop : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["crop_size"].IsScalar()) {
+      height_ = item["crop_size"].as<int>();
+      width_ = item["crop_size"].as<int>();
+    } else if (item["crop_size"].IsSequence()) {
+      std::vector<int> crop_size = item["crop_size"].as<std::vector<int>>();
+      width_ = crop_size[0];
+      height_ = crop_size[1];
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int height_;
+  int width_;
+};
+/*
+ * @brief
+ * This class execute padding operation on image matrix. It makes border on edge
+ * of image matrix.
+ * */
+class Padding : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["coarsest_stride"].IsDefined()) {
+      coarsest_stride_ = item["coarsest_stride"].as<int>();
+      if (coarsest_stride_ < 1) {
+        std::cerr << "[Padding] coarest_stride should greater than 0"
+                  << std::endl;
+        exit(-1);
+      }
+    }
+    if (item["target_size"].IsDefined()) {
+      if (item["target_size"].IsScalar()) {
+        width_ = item["target_size"].as<int>();
+        height_ = item["target_size"].as<int>();
+      } else if (item["target_size"].IsSequence()) {
+        width_ = item["target_size"].as<std::vector<int>>()[0];
+        height_ = item["target_size"].as<std::vector<int>>()[1];
+      }
+    }
+    if (item["im_padding_value"].IsDefined()) {
+      im_value_ = item["im_padding_value"].as<std::vector<float>>();
+    } else {
+      im_value_ = {0, 0, 0};
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  int coarsest_stride_ = -1;
+  int width_ = 0;
+  int height_ = 0;
+  std::vector<float> im_value_;
+};
+class Transforms {
+ public:
+  void Init(const YAML::Node& node, bool to_rgb = true);
+  std::shared_ptr<Transform> CreateTransform(const std::string& name);
+  bool Run(cv::Mat* im, ImageBlob* data);
+ private:
+  std::vector<std::shared_ptr<Transform>> transforms_;
+  bool to_rgb_ = true;
+};
+}  // namespace PaddleX
--- a/deploy/raspberry/include/paddlex/visualize.h
+++ b/deploy/raspberry/include/paddlex/visualize.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <iostream>
+#include <map>
+#include <vector>
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+#else  // Linux/Unix
+#include <dirent.h>
+#include <sys/io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <string>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include "include/paddlex/results.h"
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+namespace PaddleX {
+/*
+ * @brief
+ * Generate visualization colormap for each class
+ *
+ * @param number of class
+ * @return color map, the size of vector is 3 * num_class
+ * */
+std::vector<int> GenerateColorMap(int num_class);
+/*
+ * @brief
+ * Visualize the detection result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
+cv::Mat Visualize(const cv::Mat& img,
+                     const DetResult& results,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold = 0.5);
+/*
+ * @brief
+ * Visualize the segmentation result
+ *
+ * @param img: initial image matrix
+ * @param results: the detection result
+ * @param labels: label map
+ * @param colormap: visualization color map
+ * @return visualized image matrix
+ * */
+cv::Mat Visualize(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap);
+/*
+ * @brief
+ * generate save path for visualized image matrix
+ *
+ * @param save_dir: directory for saving visualized image matrix
+ * @param file_path: sourcen image file path
+ * @return path of saving visualized result
+ * */
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path);
+}  // namespace PaddleX
--- a/deploy/raspberry/python/__init__.py
+++ b/deploy/raspberry/python/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/raspberry/python/demo.py
+++ b/deploy/raspberry/python/demo.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import argparse
+import deploy
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir",
+        "-m",
+        type=str,
+        default=None,
+        help="path to openvino model .xml file")
+    parser.add_argument(
+        "--img", "-i", type=str, default=None, help="path to an image files")
+    parser.add_argument(
+        "--img_list", "-l", type=str, default=None, help="Path to a imglist")
+    parser.add_argument(
+        "--cfg_file",
+        "-c",
+        type=str,
+        default=None,
+        help="Path to PaddelX model yml file")
+    parser.add_argument(
+        "--thread_num",
+        "-t",
+        type=int,
+        default=1,
+        help="Path to PaddelX model yml file")
+    parser.add_argument(
+        "--input_shape",
+        "-ip",
+        type=str,
+        default=None,
+        help=" image input shape of model [NCHW] like [1,3,224,244] ")
+    return parser
+def main():
+    parser = arg_parser()
+    args = parser.parse_args()
+    model_nb = args.model_dir
+    model_yaml = args.cfg_file
+    thread_num = args.thread_num
+    input_shape = args.input_shape
+    input_shape = input_shape[1:-1].split(",", 3)
+    shape = list(map(int, input_shape))
+    #model init
+    predictor = deploy.Predictor(model_nb, model_yaml, thread_num, shape)
+    #predict
+    if (args.img_list != None):
+        f = open(args.img_list)
+        lines = f.readlines()
+        for im_path in lines:
+            print(im_path)
+            predictor.predict(im_path.strip('\n'))
+        f.close()
+    else:
+        im_path = args.img
+        predictor.predict(im_path)
+if __name__ == "__main__":
+    main()
--- a/deploy/raspberry/python/transforms/__init__.py
+++ b/deploy/raspberry/python/transforms/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import cls_transforms
+from . import det_transforms
+from . import seg_transforms
--- a/deploy/raspberry/python/transforms/cls_transforms.py
+++ b/deploy/raspberry/python/transforms/cls_transforms.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .ops import *
+import random
+import os.path as osp
+import numpy as np
+from PIL import Image, ImageEnhance
+class ClsTransform:
+    """分类Transform的基类
+    """
+    def __init__(self):
+        pass
+class Compose(ClsTransform):
+    """根据数据预处理/增强算子对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强算子。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 数据长度不匹配。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；
+                字段由transforms中的最后一个数据预处理操作决定。
+        """
+        if isinstance(im, np.ndarray):
+            if len(im.shape) != 3:
+                raise Exception(
+                    "im should be 3-dimension, but now is {}-dimensions".
+                    format(len(im.shape)))
+        else:
+            try:
+                im = cv2.imread(im).astype('float32')
+            except:
+                raise TypeError('Can\'t read The image file {}!'.format(im))
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        for op in self.transforms:
+            outputs = op(im, label)
+            im = outputs[0]
+            if len(outputs) == 2:
+                label = outputs[1]
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print(
+                    "{} is already in ComposedTransforms, need to remove it from add_augmenters().".
+                    format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class Normalize(ClsTransform):
+    """对图像进行标准化。
+    1. 对图像进行归一化到区间[0.0, 1.0]。
+    2. 对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
+        std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
+    """
+    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
+        self.mean = mean
+        self.std = std
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class ResizeByShort(ClsTransform):
+    """根据图像短边对图像重新调整大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度；
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        short_size (int): 调整大小后的图像目标短边长度。默认为256。
+        max_size (int): 长边目标长度的最大限制。默认为-1。
+    """
+    def __init__(self, short_size=256, max_size=-1):
+        self.short_size = short_size
+        self.max_size = max_size
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_LINEAR)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class CenterCrop(ClsTransform):
+    """以图像中心点扩散裁剪长宽为`crop_size`的正方形
+    1. 计算剪裁的起始点。
+    2. 剪裁图像。
+    Args:
+        crop_size (int): 裁剪的目标边长。默认为224。
+    """
+    def __init__(self, crop_size=224):
+        self.crop_size = crop_size
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, )，对应图像np.ndarray数据；
+                   当label不为空时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、图像类别id。
+        """
+        im = center_crop(im, self.crop_size)
+        if label is None:
+            return (im, )
+        else:
+            return (im, label)
+class ArrangeClassifier(ClsTransform):
+    """获取训练/验证/预测所需信息。注意：此操作不需用户自己显示调用
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
+    """
+    def __init__(self, mode=None):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode must be in ['train', 'eval', 'test', 'quant']!")
+        self.mode = mode
+    def __call__(self, im, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            label (int): 每张图像所对应的类别序号。
+        Returns:
+            tuple: 当mode为'train'或'eval'时，返回(im, label)，分别对应图像np.ndarray数据、
+                图像类别id；当mode为'test'或'quant'时，返回(im, )，对应图像np.ndarray数据。
+        """
+        im = permute(im, False).astype('float32')
+        if self.mode == 'train' or self.mode == 'eval':
+            outputs = (im, label)
+        else:
+            outputs = (im, )
+        return outputs
+class ComposedClsTransforms(Compose):
+    """ 分类模型的基础Transforms流程，具体如下
+        训练阶段：
+        1. 随机从图像中crop一块子图，并resize成crop_size大小
+        2. 将1的输出按0.5的概率随机进行水平翻转
+        3. 将图像进行归一化
+        验证/预测阶段：
+        1. 将图像按比例Resize，使得最小边长度为crop_size[0] * 1.14
+        2. 从图像中心crop出一个大小为crop_size的图像
+        3. 将图像进行归一化
+        Args:
+            mode(str): 图像处理流程所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            crop_size(int|list): 输入模型里的图像大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 crop_size=[224, 224],
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = crop_size
+        if isinstance(crop_size, list):
+            if crop_size[0] != crop_size[1]:
+                raise Exception(
+                    "In classifier model, width and height should be equal, please modify your parameter `crop_size`"
+                )
+            width = crop_size[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
+            )
+        if mode == 'train':
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                ResizeByShort(short_size=int(width * 1.14)),
+                CenterCrop(crop_size=width), Normalize(
+                    mean=mean, std=std)
+            ]
+        super(ComposedClsTransforms, self).__init__(transforms)
--- a/deploy/raspberry/python/transforms/det_transforms.py
+++ b/deploy/raspberry/python/transforms/det_transforms.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+import random
+import os.path as osp
+import numpy as np
+import cv2
+from PIL import Image, ImageEnhance
+from .ops import *
+class DetTransform:
+    """检测数据处理基类
+    """
+    def __init__(self):
+        pass
+class Compose(DetTransform):
+    """根据数据预处理/增强列表对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强列表。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 数据长度不匹配。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+        self.use_mixup = False
+        for t in self.transforms:
+            if type(t).__name__ == 'MixupImage':
+                self.use_mixup = True
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            im_info (dict): 存储与图像相关的信息，dict中的字段如下：
+                - im_id (np.ndarray): 图像序列号，形状为(1,)。
+                - image_shape (np.ndarray): 图像原始大小，形状为(2,)，
+                                        image_shape[0]为高，image_shape[1]为宽。
+                - mixup (list): list为[im, im_info, label_info]，分别对应
+                                与当前图像进行mixup的图像np.ndarray数据、图像相关信息、标注框相关信息；
+                                注意，当前epoch若无需进行mixup，则无该字段。
+            label_info (dict): 存储与标注框相关的信息，dict中的字段如下：
+                - gt_bbox (np.ndarray): 真实标注框坐标[x1, y1, x2, y2]，形状为(n, 4)，
+                                   其中n代表真实标注框的个数。
+                - gt_class (np.ndarray): 每个真实标注框对应的类别序号，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - gt_score (np.ndarray): 每个真实标注框对应的混合得分，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - gt_poly (list): 每个真实标注框内的多边形分割区域，每个分割区域由点的x、y坐标组成，
+                                  长度为n，其中n代表真实标注框的个数。
+                - is_crowd (np.ndarray): 每个真实标注框中是否是一组对象，形状为(n, 1)，
+                                    其中n代表真实标注框的个数。
+                - difficult (np.ndarray): 每个真实标注框中的对象是否为难识别对象，形状为(n, 1)，
+                                     其中n代表真实标注框的个数。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；
+                字段由transforms中的最后一个数据预处理操作决定。
+        """
+        def decode_image(im_file, im_info, label_info):
+            if im_info is None:
+                im_info = dict()
+            if isinstance(im_file, np.ndarray):
+                if len(im_file.shape) != 3:
+                    raise Exception(
+                        "im should be 3-dimensions, but now is {}-dimensions".
+                        format(len(im_file.shape)))
+                im = im_file
+            else:
+                try:
+                    im = cv2.imread(im_file).astype('float32')
+                except:
+                    raise TypeError('Can\'t read The image file {}!'.format(
+                        im_file))
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+            # make default im_info with [h, w, 1]
+            im_info['im_resize_info'] = np.array(
+                [im.shape[0], im.shape[1], 1.], dtype=np.float32)
+            im_info['image_shape'] = np.array([im.shape[0],
+                                               im.shape[1]]).astype('int32')
+            if not self.use_mixup:
+                if 'mixup' in im_info:
+                    del im_info['mixup']
+            # decode mixup image
+            if 'mixup' in im_info:
+                im_info['mixup'] = \
+                  decode_image(im_info['mixup'][0],
+                               im_info['mixup'][1],
+                               im_info['mixup'][2])
+            if label_info is None:
+                return (im, im_info)
+            else:
+                return (im, im_info, label_info)
+        outputs = decode_image(im, im_info, label_info)
+        im = outputs[0]
+        im_info = outputs[1]
+        if len(outputs) == 3:
+            label_info = outputs[2]
+        for op in self.transforms:
+            if im is None:
+                return None
+            outputs = op(im, im_info, label_info)
+            im = outputs[0]
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print(
+                    "{} is already in ComposedTransforms, need to remove it from add_augmenters().".
+                    format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class ResizeByShort(DetTransform):
+    """根据图像的短边调整图像大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度。
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        target_size (int): 短边目标长度。默认为800。
+        max_size (int): 长边目标长度的最大限制。默认为1333。
+     Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, short_size=800, max_size=1333):
+        self.max_size = int(max_size)
+        if not isinstance(short_size, int):
+            raise TypeError(
+                "Type of short_size is invalid. Must be Integer, now is {}".
+                format(type(short_size)))
+        self.short_size = short_size
+        if not (isinstance(self.max_size, int)):
+            raise TypeError("max_size: input type is invalid.")
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+                   其中，im_info更新字段为：
+                       - im_resize_info (np.ndarray): resize后的图像高、resize后的图像宽、resize后的图像相对原始图的缩放比例
+                                                 三者组成的np.ndarray，形状为(3,)。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeByShort: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeByShort: image is not 3-dimensional.')
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im_resize_info = [resized_height, resized_width, scale]
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_LINEAR)
+        im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class Padding(DetTransform):
+    """1.将图像的长和宽padding至coarsest_stride的倍数。如输入图像为[300, 640],
+       `coarest_stride`为32，则由于300不为32的倍数，因此在图像最右和最下使用0值
+       进行padding，最终输出图像为[320, 640]。
+       2.或者，将图像的长和宽padding到target_size指定的shape，如输入的图像为[300，640]，
+         a. `target_size` = 960，在图像最右和最下使用0值进行padding，最终输出
+            图像为[960, 960]。
+         b. `target_size` = [640, 960]，在图像最右和最下使用0值进行padding，最终
+            输出图像为[640, 960]。
+    1. 如果coarsest_stride为1，target_size为None则直接返回。
+    2. 获取图像的高H、宽W。
+    3. 计算填充后图像的高H_new、宽W_new。
+    4. 构建大小为(H_new, W_new, 3)像素值为0的np.ndarray，
+       并将原图的np.ndarray粘贴于左上角。
+    Args:
+        coarsest_stride (int): 填充后的图像长、宽为该参数的倍数，默认为1。
+        target_size (int|list|tuple): 填充后的图像长、宽，默认为None，coarset_stride优先级更高。
+    Raises:
+        TypeError: 形参`target_size`数据类型不满足需求。
+        ValueError: 形参`target_size`为(list|tuple)时，长度不满足需求。
+    """
+    def __init__(self, coarsest_stride=1, target_size=None):
+        self.coarsest_stride = coarsest_stride
+        if target_size is not None:
+            if not isinstance(target_size, int):
+                if not isinstance(target_size, tuple) and not isinstance(
+                        target_size, list):
+                    raise TypeError(
+                        "Padding: Type of target_size must in (int|list|tuple)."
+                    )
+                elif len(target_size) != 2:
+                    raise ValueError(
+                        "Padding: Length of target_size must equal 2.")
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+            ValueError: coarsest_stride，target_size需有且只有一个被指定。
+            ValueError: target_size小于原图的大小。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("Padding: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('Padding: image is not 3-dimensional.')
+        im_h, im_w, im_c = im.shape[:]
+        if isinstance(self.target_size, int):
+            padding_im_h = self.target_size
+            padding_im_w = self.target_size
+        elif isinstance(self.target_size, list) or isinstance(self.target_size,
+                                                              tuple):
+            padding_im_w = self.target_size[0]
+            padding_im_h = self.target_size[1]
+        elif self.coarsest_stride > 0:
+            padding_im_h = int(
+                np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride)
+            padding_im_w = int(
+                np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride)
+        else:
+            raise ValueError(
+                "coarsest_stridei(>1) or target_size(list|int) need setting in Padding transform"
+            )
+        pad_height = padding_im_h - im_h
+        pad_width = padding_im_w - im_w
+        if pad_height < 0 or pad_width < 0:
+            raise ValueError(
+                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
+                .format(im_w, im_h, padding_im_w, padding_im_h))
+        padding_im = np.zeros(
+            (padding_im_h, padding_im_w, im_c), dtype=np.float32)
+        padding_im[:im_h, :im_w, :] = im
+        if label_info is None:
+            return (padding_im, im_info)
+        else:
+            return (padding_im, im_info, label_info)
+class Resize(DetTransform):
+    """调整图像大小（resize）。
+    - 当目标大小（target_size）类型为int时，根据插值方式，
+      将图像resize为[target_size, target_size]。
+    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
+      将图像resize为target_size。
+    注意：当插值方式为“RANDOM”时，则随机选取一种插值方式进行resize。
+    Args:
+        target_size (int/list/tuple): 短边目标长度。默认为608。
+        interp (str): resize的插值方式，与opencv的插值方式对应，取值范围为
+            ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+        ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
+                    'AREA', 'LANCZOS4', 'RANDOM']中。
+    """
+    # The interpolation mode
+    interp_dict = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'LANCZOS4': cv2.INTER_LANCZOS4
+    }
+    def __init__(self, target_size=608, interp='LINEAR'):
+        self.interp = interp
+        if not (interp == "RANDOM" or interp in self.interp_dict):
+            raise ValueError("interp should be one of {}".format(
+                self.interp_dict.keys()))
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise TypeError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = dict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("Resize: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('Resize: image is not 3-dimensional.')
+        if self.interp == "RANDOM":
+            interp = random.choice(list(self.interp_dict.keys()))
+        else:
+            interp = self.interp
+        im = resize(im, self.target_size, self.interp_dict[interp])
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class Normalize(DetTransform):
+    """对图像进行标准化。
+    1. 归一化图像到到区间[0.0, 1.0]。
+    2. 对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认为[0.485, 0.456, 0.406]。
+        std (list): 图像数据集的标准差。默认为[0.229, 0.224, 0.225]。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
+            raise TypeError("NormalizeImage: input type is invalid.")
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise TypeError('NormalizeImage: std is invalid!')
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+class ArrangeYOLOv3(DetTransform):
+    """获取YOLOv3模型训练/验证/预测所需信息。
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内。
+    """
+    def __init__(self, mode=None):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode must be in ['train', 'eval', 'test', 'quant']!")
+        self.mode = mode
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+        Returns:
+            tuple: 当mode为'train'时，返回(im, gt_bbox, gt_class, gt_score, im_shape)，分别对应
+                图像np.ndarray数据、真实标注框、真实标注框对应的类别、真实标注框混合得分、图像大小信息；
+                当mode为'eval'时，返回(im, im_shape, im_id, gt_bbox, gt_class, difficult)，
+                分别对应图像np.ndarray数据、图像大小信息、图像id、真实标注框、真实标注框对应的类别、
+                真实标注框是否为难识别对象；当mode为'test'或'quant'时，返回(im, im_shape)，
+                分别对应图像np.ndarray数据、图像大小信息。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        im = permute(im, False)
+        if self.mode == 'train':
+            pass
+        elif self.mode == 'eval':
+            pass
+        else:
+            if im_info is None:
+                raise TypeError('Cannot do ArrangeYolov3! ' +
+                                'Becasuse the im_info can not be None!')
+            im_shape = im_info['image_shape']
+            outputs = (im, im_shape)
+        return outputs
+class ComposedYOLOv3Transforms(Compose):
+    """YOLOv3模型的图像预处理流程，具体如下，
+        训练阶段：
+        1. 在前mixup_epoch轮迭代中，使用MixupImage策略，见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
+        2. 对图像进行随机扰动，包括亮度，对比度，饱和度和色调
+        3. 随机扩充图像，见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
+        4. 随机裁剪图像
+        5. 将4步骤的输出图像Resize成shape参数的大小
+        6. 随机0.5的概率水平翻转图像
+        7. 图像归一化
+        验证/预测阶段：
+        1. 将图像Resize成shape参数大小
+        2. 图像归一化
+        Args:
+            mode(str): 图像处理流程所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            shape(list): 输入模型中图像的大小，输入模型的图像会被Resize成此大小
+            mixup_epoch(int): 模型训练过程中，前mixup_epoch会使用mixup策略
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 shape=[608, 608],
+                 mixup_epoch=250,
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = shape
+        if isinstance(shape, list):
+            if shape[0] != shape[1]:
+                raise Exception(
+                    "In YOLOv3 model, width and height should be equal")
+            width = shape[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
+            )
+        if mode == 'train':
+            # 训练时的transforms，包含数据增强
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                Resize(
+                    target_size=width, interp='CUBIC'), Normalize(
+                        mean=mean, std=std)
+            ]
+        super(ComposedYOLOv3Transforms, self).__init__(transforms)
--- a/deploy/raspberry/python/transforms/ops.py
+++ b/deploy/raspberry/python/transforms/ops.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import math
+import numpy as np
+from PIL import Image, ImageEnhance
+def normalize(im, mean, std):
+    im = im / 255.0
+    im -= mean
+    im /= std
+    return im
+def permute(im, to_bgr=False):
+    im = np.swapaxes(im, 1, 2)
+    im = np.swapaxes(im, 1, 0)
+    if to_bgr:
+        im = im[[2, 1, 0], :, :]
+    return im
+def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
+    value = max(im.shape[0], im.shape[1])
+    scale = float(long_size) / float(value)
+    resized_width = int(round(im.shape[1] * scale))
+    resized_height = int(round(im.shape[0] * scale))
+    im = cv2.resize(
+        im, (resized_width, resized_height), interpolation=interpolation)
+    return im
+def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
+    if isinstance(target_size, list) or isinstance(target_size, tuple):
+        w = target_size[0]
+        h = target_size[1]
+    else:
+        w = target_size
+        h = target_size
+    im = cv2.resize(im, (w, h), interpolation=interp)
+    return im
+def random_crop(im,
+                crop_size=224,
+                lower_scale=0.08,
+                lower_ratio=3. / 4,
+                upper_ratio=4. / 3):
+    scale = [lower_scale, 1.0]
+    ratio = [lower_ratio, upper_ratio]
+    aspect_ratio = math.sqrt(np.random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+    bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
+                (float(im.shape[1]) / im.shape[0]) / (w**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+    target_area = im.shape[0] * im.shape[1] * np.random.uniform(
+        scale_min, scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+    i = np.random.randint(0, im.shape[0] - h + 1)
+    j = np.random.randint(0, im.shape[1] - w + 1)
+    im = im[i:i + h, j:j + w, :]
+    im = cv2.resize(im, (crop_size, crop_size))
+    return im
+def center_crop(im, crop_size=224):
+    height, width = im.shape[:2]
+    w_start = (width - crop_size) // 2
+    h_start = (height - crop_size) // 2
+    w_end = w_start + crop_size
+    h_end = h_start + crop_size
+    im = im[h_start:h_end, w_start:w_end, :]
+    return im
+def horizontal_flip(im):
+    if len(im.shape) == 3:
+        im = im[:, ::-1, :]
+    elif len(im.shape) == 2:
+        im = im[:, ::-1]
+    return im
+def vertical_flip(im):
+    if len(im.shape) == 3:
+        im = im[::-1, :, :]
+    elif len(im.shape) == 2:
+        im = im[::-1, :]
+    return im
+def bgr2rgb(im):
+    return im[:, :, ::-1]
+def hue(im, hue_lower, hue_upper):
+    delta = np.random.uniform(hue_lower, hue_upper)
+    u = np.cos(delta * np.pi)
+    w = np.sin(delta * np.pi)
+    bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
+    tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
+                     [0.211, -0.523, 0.311]])
+    ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
+                      [1.0, -1.107, 1.705]])
+    t = np.dot(np.dot(ityiq, bt), tyiq).T
+    im = np.dot(im, t)
+    return im
+def saturation(im, saturation_lower, saturation_upper):
+    delta = np.random.uniform(saturation_lower, saturation_upper)
+    gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
+    gray = gray.sum(axis=2, keepdims=True)
+    gray *= (1.0 - delta)
+    im *= delta
+    im += gray
+    return im
+def contrast(im, contrast_lower, contrast_upper):
+    delta = np.random.uniform(contrast_lower, contrast_upper)
+    im *= delta
+    return im
+def brightness(im, brightness_lower, brightness_upper):
+    delta = np.random.uniform(brightness_lower, brightness_upper)
+    im += delta
+    return im
+def rotate(im, rotate_lower, rotate_upper):
+    rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
+    im = im.rotate(int(rotate_delta))
+    return im
+def resize_padding(im, max_side_len=2400):
+    '''
+    resize image to a size multiple of 32 which is required by the network
+    :param im: the resized image
+    :param max_side_len: limit of max image size to avoid out of memory in gpu
+    :return: the resized image and the resize ratio
+    '''
+    h, w, _ = im.shape
+    resize_w = w
+    resize_h = h
+    # limit the max side
+    if max(resize_h, resize_w) > max_side_len:
+        ratio = float(
+            max_side_len) / resize_h if resize_h > resize_w else float(
+                max_side_len) / resize_w
+    else:
+        ratio = 1.
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
+    resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
+    resize_h = max(32, resize_h)
+    resize_w = max(32, resize_w)
+    im = cv2.resize(im, (int(resize_w), int(resize_h)))
+    #im = cv2.resize(im, (512, 512))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
+    return im, _ratio
--- a/deploy/raspberry/python/transforms/seg_transforms.py
+++ b/deploy/raspberry/python/transforms/seg_transforms.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .ops import *
+import random
+import os.path as osp
+import numpy as np
+from PIL import Image
+import cv2
+from collections import OrderedDict
+class SegTransform:
+    """ 分割transform基类
+    """
+    def __init__(self):
+        pass
+class Compose(SegTransform):
+    """根据数据预处理/增强算子对输入数据进行操作。
+       所有操作的输入图像流形状均是[H, W, C]，其中H为图像高，W为图像宽，C为图像通道数。
+    Args:
+        transforms (list): 数据预处理/增强算子。
+    Raises:
+        TypeError: transforms不是list对象
+        ValueError: transforms元素个数小于1。
+    """
+    def __init__(self, transforms):
+        if not isinstance(transforms, list):
+            raise TypeError('The transforms must be a list!')
+        if len(transforms) < 1:
+            raise ValueError('The length of transforms ' + \
+                            'must be equal or larger than 1!')
+        self.transforms = transforms
+        self.to_rgb = False
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (str/np.ndarray): 图像路径/图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
+        Returns:
+            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
+        """
+        if im_info is None:
+            im_info = list()
+        if isinstance(im, np.ndarray):
+            if len(im.shape) != 3:
+                raise Exception(
+                    "im should be 3-dimensions, but now is {}-dimensions".
+                    format(len(im.shape)))
+        else:
+            try:
+                im = cv2.imread(im).astype('float32')
+            except:
+                raise ValueError('Can\'t read The image file {}!'.format(im))
+        if self.to_rgb:
+            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        if label is not None:
+            if not isinstance(label, np.ndarray):
+                label = np.asarray(Image.open(label))
+        for op in self.transforms:
+            if isinstance(op, SegTransform):
+                outputs = op(im, im_info, label)
+                im = outputs[0]
+                if len(outputs) >= 2:
+                    im_info = outputs[1]
+                if len(outputs) == 3:
+                    label = outputs[2]
+            else:
+                im = execute_imgaug(op, im)
+                if label is not None:
+                    outputs = (im, im_info, label)
+                else:
+                    outputs = (im, im_info)
+        return outputs
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        transform_names = [type(x).__name__ for x in self.transforms]
+        for aug in augmenters:
+            if type(aug).__name__ in transform_names:
+                print("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
+        self.transforms = augmenters + self.transforms
+class RandomHorizontalFlip(SegTransform):
+    """以一定的概率对图像进行水平翻转。当存在标注图像时，则同步进行翻转。
+    Args:
+        prob (float): 随机水平翻转的概率。默认值为0.5。
+    """
+    def __init__(self, prob=0.5):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if random.random() < self.prob:
+            im = horizontal_flip(im)
+            if label is not None:
+                label = horizontal_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomVerticalFlip(SegTransform):
+    """以一定的概率对图像进行垂直翻转。当存在标注图像时，则同步进行翻转。
+    Args:
+        prob (float): 随机垂直翻转的概率。默认值为0.1。
+    """
+    def __init__(self, prob=0.1):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if random.random() < self.prob:
+            im = vertical_flip(im)
+            if label is not None:
+                label = vertical_flip(label)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Resize(SegTransform):
+    """调整图像大小（resize），当存在标注图像时，则同步进行处理。
+    - 当目标大小（target_size）类型为int时，根据插值方式，
+      将图像resize为[target_size, target_size]。
+    - 当目标大小（target_size）类型为list或tuple时，根据插值方式，
+      将图像resize为target_size, target_size的输入应为[w, h]或（w, h）。
+    Args:
+        target_size (int|list|tuple): 目标大小。
+        interp (str): resize的插值方式，与opencv的插值方式对应，
+            可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']，默认为"LINEAR"。
+    Raises:
+        TypeError: target_size不是int/list/tuple。
+        ValueError:  target_size为list/tuple时元素个数不等于2。
+        AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内。
+    """
+    # The interpolation mode
+    interp_dict = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'LANCZOS4': cv2.INTER_LANCZOS4
+    }
+    def __init__(self, target_size, interp='LINEAR'):
+        self.interp = interp
+        assert interp in self.interp_dict, "interp should be one of {}".format(
+            interp_dict.keys())
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info跟新字段为：
+                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w）。
+        Raises:
+            ZeroDivisionError: im的短边为0。
+            TypeError: im不是np.ndarray数据。
+            ValueError: im不是3维nd.ndarray。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('resize', im.shape[:2]))
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeImage: image type is not np.ndarray.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeImage: image is not 3-dimensional.')
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        if float(im_size_min) == 0:
+            raise ZeroDivisionError('ResizeImage: min size of image is 0')
+        if isinstance(self.target_size, int):
+            resize_w = self.target_size
+            resize_h = self.target_size
+        else:
+            resize_w = self.target_size[0]
+            resize_h = self.target_size[1]
+        im_scale_x = float(resize_w) / float(im_shape[1])
+        im_scale_y = float(resize_h) / float(im_shape[0])
+        im = cv2.resize(
+            im,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp_dict[self.interp])
+        if label is not None:
+            label = cv2.resize(
+                label,
+                None,
+                None,
+                fx=im_scale_x,
+                fy=im_scale_y,
+                interpolation=self.interp_dict['NEAREST'])
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeByLong(SegTransform):
+    """对图像长边resize到固定值，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
+    Args:
+        long_size (int): resize后图像的长边大小。
+    """
+    def __init__(self, long_size):
+        self.long_size = long_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info新增字段为：
+                    -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('resize', im.shape[:2]))
+        im = resize_long(im, self.long_size)
+        if label is not None:
+            label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeByShort(SegTransform):
+    """根据图像的短边调整图像大小（resize）。
+    1. 获取图像的长边和短边长度。
+    2. 根据短边与short_size的比例，计算长边的目标长度，
+       此时高、宽的resize比例为short_size/原图短边长度。
+    3. 如果max_size>0，调整resize比例：
+       如果长边的目标长度>max_size，则高、宽的resize比例为max_size/原图长边长度。
+    4. 根据调整大小的比例对图像进行resize。
+    Args:
+        target_size (int): 短边目标长度。默认为800。
+        max_size (int): 长边目标长度的最大限制。默认为1333。
+     Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+    def __init__(self, short_size=800, max_size=1333):
+        self.max_size = int(max_size)
+        if not isinstance(short_size, int):
+            raise TypeError(
+                "Type of short_size is invalid. Must be Integer, now is {}".
+                format(type(short_size)))
+        self.short_size = short_size
+        if not (isinstance(self.max_size, int)):
+            raise TypeError("max_size: input type is invalid.")
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                   当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                   存储与图像相关信息的字典和标注图像np.ndarray数据。
+                   其中，im_info更新字段为：
+                       -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
+        Raises:
+            TypeError: 形参数据类型不满足需求。
+            ValueError: 数据长度不匹配。
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        if not isinstance(im, np.ndarray):
+            raise TypeError("ResizeByShort: image type is not numpy.")
+        if len(im.shape) != 3:
+            raise ValueError('ResizeByShort: image is not 3-dimensional.')
+        im_info.append(('resize', im.shape[:2]))
+        im_short_size = min(im.shape[0], im.shape[1])
+        im_long_size = max(im.shape[0], im.shape[1])
+        scale = float(self.short_size) / im_short_size
+        if self.max_size > 0 and np.round(scale *
+                                          im_long_size) > self.max_size:
+            scale = float(self.max_size) / float(im_long_size)
+        resized_width = int(round(im.shape[1] * scale))
+        resized_height = int(round(im.shape[0] * scale))
+        im = cv2.resize(
+            im, (resized_width, resized_height),
+            interpolation=cv2.INTER_NEAREST)
+        if label is not None:
+            im = cv2.resize(
+                label, (resized_width, resized_height),
+                interpolation=cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeRangeScaling(SegTransform):
+    """对图像长边随机resize到指定范围内，短边按比例进行缩放。当存在标注图像时，则同步进行处理。
+    Args:
+        min_value (int): 图像长边resize后的最小值。默认值400。
+        max_value (int): 图像长边resize后的最大值。默认值600。
+    Raises:
+        ValueError: min_value大于max_value
+    """
+    def __init__(self, min_value=400, max_value=600):
+        if min_value > max_value:
+            raise ValueError('min_value must be less than max_value, '
+                             'but they are {} and {}.'.format(min_value,
+                                                              max_value))
+        self.min_value = min_value
+        self.max_value = max_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_value == self.max_value:
+            random_size = self.max_value
+        else:
+            random_size = int(
+                np.random.uniform(self.min_value, self.max_value) + 0.5)
+        im = resize_long(im, random_size, cv2.INTER_LINEAR)
+        if label is not None:
+            label = resize_long(label, random_size, cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ResizeStepScaling(SegTransform):
+    """对图像按照某一个比例resize，这个比例以scale_step_size为步长
+    在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时，则同步进行处理。
+    Args:
+        min_scale_factor（float), resize最小尺度。默认值0.75。
+        max_scale_factor (float), resize最大尺度。默认值1.25。
+        scale_step_size (float), resize尺度范围间隔。默认值0.25。
+    Raises:
+        ValueError: min_scale_factor大于max_scale_factor
+    """
+    def __init__(self,
+                 min_scale_factor=0.75,
+                 max_scale_factor=1.25,
+                 scale_step_size=0.25):
+        if min_scale_factor > max_scale_factor:
+            raise ValueError(
+                'min_scale_factor must be less than max_scale_factor, '
+                'but they are {} and {}.'.format(min_scale_factor,
+                                                 max_scale_factor))
+        self.min_scale_factor = min_scale_factor
+        self.max_scale_factor = max_scale_factor
+        self.scale_step_size = scale_step_size
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_scale_factor == self.max_scale_factor:
+            scale_factor = self.min_scale_factor
+        elif self.scale_step_size == 0:
+            scale_factor = np.random.uniform(self.min_scale_factor,
+                                             self.max_scale_factor)
+        else:
+            num_steps = int((self.max_scale_factor - self.min_scale_factor) /
+                            self.scale_step_size + 1)
+            scale_factors = np.linspace(self.min_scale_factor,
+                                        self.max_scale_factor,
+                                        num_steps).tolist()
+            np.random.shuffle(scale_factors)
+            scale_factor = scale_factors[0]
+        im = cv2.resize(
+            im, (0, 0),
+            fx=scale_factor,
+            fy=scale_factor,
+            interpolation=cv2.INTER_LINEAR)
+        if label is not None:
+            label = cv2.resize(
+                label, (0, 0),
+                fx=scale_factor,
+                fy=scale_factor,
+                interpolation=cv2.INTER_NEAREST)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Normalize(SegTransform):
+    """对图像进行标准化。
+    1.尺度缩放到 [0,1]。
+    2.对图像进行减均值除以标准差操作。
+    Args:
+        mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
+        std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
+    Raises:
+        ValueError: mean或std不是list对象。std包含0。
+    """
+    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+        self.mean = mean
+        self.std = std
+        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
+            raise ValueError("{}: input type is invalid.".format(self))
+        from functools import reduce
+        if reduce(lambda x, y: x * y, self.std) == 0:
+            raise ValueError('{}: std is invalid!'.format(self))
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+         Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+        im = normalize(im, mean, std)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class Padding(SegTransform):
+    """对图像或标注图像进行padding，padding方向为右和下。
+    根据提供的值对图像或标注图像进行padding操作。
+    Args:
+        target_size (int|list|tuple): padding后图像的大小。
+        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
+        label_padding_value (int): 标注图像padding的值。默认值为255。
+    Raises:
+        TypeError: target_size不是int|list|tuple。
+        ValueError:  target_size为list|tuple时元素个数不等于2。
+    """
+    def __init__(self,
+                 target_size,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(target_size, list) or isinstance(target_size, tuple):
+            if len(target_size) != 2:
+                raise ValueError(
+                    'when target is list or tuple, it should include 2 elements, but it is {}'
+                    .format(target_size))
+        elif not isinstance(target_size, int):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(target_size)))
+        self.target_size = target_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+                其中，im_info新增字段为：
+                    -shape_before_padding (tuple): 保存padding之前图像的形状(h, w）。
+        Raises:
+            ValueError: 输入图像im或label的形状大于目标值
+        """
+        if im_info is None:
+            im_info = OrderedDict()
+        im_info.append(('padding', im.shape[:2]))
+        im_height, im_width = im.shape[0], im.shape[1]
+        if isinstance(self.target_size, int):
+            target_height = self.target_size
+            target_width = self.target_size
+        else:
+            target_height = self.target_size[1]
+            target_width = self.target_size[0]
+        pad_height = target_height - im_height
+        pad_width = target_width - im_width
+        if pad_height < 0 or pad_width < 0:
+            raise ValueError(
+                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
+                .format(im_width, im_height, target_width, target_height))
+        else:
+            im = cv2.copyMakeBorder(
+                im,
+                0,
+                pad_height,
+                0,
+                pad_width,
+                cv2.BORDER_CONSTANT,
+                value=self.im_padding_value)
+            if label is not None:
+                label = cv2.copyMakeBorder(
+                    label,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.label_padding_value)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomPaddingCrop(SegTransform):
+    """对图像和标注图进行随机裁剪，当所需要的裁剪尺寸大于原图时，则进行padding操作。
+    Args:
+        crop_size (int|list|tuple): 裁剪图像大小。默认为512。
+        im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
+        label_padding_value (int): 标注图像padding的值。默认值为255。
+    Raises:
+        TypeError: crop_size不是int/list/tuple。
+        ValueError:  target_size为list/tuple时元素个数不等于2。
+    """
+    def __init__(self,
+                 crop_size=512,
+                 im_padding_value=[127.5, 127.5, 127.5],
+                 label_padding_value=255):
+        if isinstance(crop_size, list) or isinstance(crop_size, tuple):
+            if len(crop_size) != 2:
+                raise ValueError(
+                    'when crop_size is list or tuple, it should include 2 elements, but it is {}'
+                    .format(crop_size))
+        elif not isinstance(crop_size, int):
+            raise TypeError(
+                "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
+                .format(type(crop_size)))
+        self.crop_size = crop_size
+        self.im_padding_value = im_padding_value
+        self.label_padding_value = label_padding_value
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+         Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if isinstance(self.crop_size, int):
+            crop_width = self.crop_size
+            crop_height = self.crop_size
+        else:
+            crop_width = self.crop_size[0]
+            crop_height = self.crop_size[1]
+        img_height = im.shape[0]
+        img_width = im.shape[1]
+        if img_height == crop_height and img_width == crop_width:
+            if label is None:
+                return (im, im_info)
+            else:
+                return (im, im_info, label)
+        else:
+            pad_height = max(crop_height - img_height, 0)
+            pad_width = max(crop_width - img_width, 0)
+            if (pad_height > 0 or pad_width > 0):
+                im = cv2.copyMakeBorder(
+                    im,
+                    0,
+                    pad_height,
+                    0,
+                    pad_width,
+                    cv2.BORDER_CONSTANT,
+                    value=self.im_padding_value)
+                if label is not None:
+                    label = cv2.copyMakeBorder(
+                        label,
+                        0,
+                        pad_height,
+                        0,
+                        pad_width,
+                        cv2.BORDER_CONSTANT,
+                        value=self.label_padding_value)
+                img_height = im.shape[0]
+                img_width = im.shape[1]
+            if crop_height > 0 and crop_width > 0:
+                h_off = np.random.randint(img_height - crop_height + 1)
+                w_off = np.random.randint(img_width - crop_width + 1)
+                im = im[h_off:(crop_height + h_off), w_off:(w_off + crop_width
+                                                            ), :]
+                if label is not None:
+                    label = label[h_off:(crop_height + h_off), w_off:(
+                        w_off + crop_width)]
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomBlur(SegTransform):
+    """以一定的概率对图像进行高斯模糊。
+    Args：
+        prob (float): 图像模糊概率。默认为0.1。
+    """
+    def __init__(self, prob=0.1):
+        self.prob = prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.prob <= 0:
+            n = 0
+        elif self.prob >= 1:
+            n = 1
+        else:
+            n = int(1.0 / self.prob)
+        if n > 0:
+            if np.random.randint(0, n) == 0:
+                radius = np.random.randint(3, 10)
+                if radius % 2 != 1:
+                    radius = radius + 1
+                if radius > 9:
+                    radius = 9
+                im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomScaleAspect(SegTransform):
+    """裁剪并resize回原始尺寸的图像和标注图像。
+    按照一定的面积比和宽高比对图像进行裁剪，并reszie回原始图像的图像，当存在标注图时，同步进行。
+    Args：
+        min_scale (float)：裁取图像占原始图像的面积比，取值[0，1]，为0时则返回原图。默认为0.5。
+        aspect_ratio (float): 裁取图像的宽高比范围，非负值，为0时返回原图。默认为0.33。
+    """
+    def __init__(self, min_scale=0.5, aspect_ratio=0.33):
+        self.min_scale = min_scale
+        self.aspect_ratio = aspect_ratio
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        if self.min_scale != 0 and self.aspect_ratio != 0:
+            img_height = im.shape[0]
+            img_width = im.shape[1]
+            for i in range(0, 10):
+                area = img_height * img_width
+                target_area = area * np.random.uniform(self.min_scale, 1.0)
+                aspectRatio = np.random.uniform(self.aspect_ratio,
+                                                1.0 / self.aspect_ratio)
+                dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
+                dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
+                if (np.random.randint(10) < 5):
+                    tmp = dw
+                    dw = dh
+                    dh = tmp
+                if (dh < img_height and dw < img_width):
+                    h1 = np.random.randint(0, img_height - dh)
+                    w1 = np.random.randint(0, img_width - dw)
+                    im = im[h1:(h1 + dh), w1:(w1 + dw), :]
+                    label = label[h1:(h1 + dh), w1:(w1 + dw)]
+                    im = cv2.resize(
+                        im, (img_width, img_height),
+                        interpolation=cv2.INTER_LINEAR)
+                    label = cv2.resize(
+                        label, (img_width, img_height),
+                        interpolation=cv2.INTER_NEAREST)
+                    break
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class RandomDistort(SegTransform):
+    """对图像进行随机失真。
+    1. 对变换的操作顺序进行随机化操作。
+    2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
+    Args:
+        brightness_range (float): 明亮度因子的范围。默认为0.5。
+        brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
+        contrast_range (float): 对比度因子的范围。默认为0.5。
+        contrast_prob (float): 随机调整对比度的概率。默认为0.5。
+        saturation_range (float): 饱和度因子的范围。默认为0.5。
+        saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
+        hue_range (int): 色调因子的范围。默认为18。
+        hue_prob (float): 随机调整色调的概率。默认为0.5。
+    """
+    def __init__(self,
+                 brightness_range=0.5,
+                 brightness_prob=0.5,
+                 contrast_range=0.5,
+                 contrast_prob=0.5,
+                 saturation_range=0.5,
+                 saturation_prob=0.5,
+                 hue_range=18,
+                 hue_prob=0.5):
+        self.brightness_range = brightness_range
+        self.brightness_prob = brightness_prob
+        self.contrast_range = contrast_range
+        self.contrast_prob = contrast_prob
+        self.saturation_range = saturation_range
+        self.saturation_prob = saturation_prob
+        self.hue_range = hue_range
+        self.hue_prob = hue_prob
+    def __call__(self, im, im_info=None, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当label为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当label不为空时，返回的tuple为(im, im_info, label)，分别对应图像np.ndarray数据、
+                存储与图像相关信息的字典和标注图像np.ndarray数据。
+        """
+        brightness_lower = 1 - self.brightness_range
+        brightness_upper = 1 + self.brightness_range
+        contrast_lower = 1 - self.contrast_range
+        contrast_upper = 1 + self.contrast_range
+        saturation_lower = 1 - self.saturation_range
+        saturation_upper = 1 + self.saturation_range
+        hue_lower = -self.hue_range
+        hue_upper = self.hue_range
+        ops = [brightness, contrast, saturation, hue]
+        random.shuffle(ops)
+        params_dict = {
+            'brightness': {
+                'brightness_lower': brightness_lower,
+                'brightness_upper': brightness_upper
+            },
+            'contrast': {
+                'contrast_lower': contrast_lower,
+                'contrast_upper': contrast_upper
+            },
+            'saturation': {
+                'saturation_lower': saturation_lower,
+                'saturation_upper': saturation_upper
+            },
+            'hue': {
+                'hue_lower': hue_lower,
+                'hue_upper': hue_upper
+            }
+        }
+        prob_dict = {
+            'brightness': self.brightness_prob,
+            'contrast': self.contrast_prob,
+            'saturation': self.saturation_prob,
+            'hue': self.hue_prob
+        }
+        for id in range(4):
+            params = params_dict[ops[id].__name__]
+            prob = prob_dict[ops[id].__name__]
+            params['im'] = im
+            if np.random.uniform(0, 1) < prob:
+                im = ops[id](**params)
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
+class ArrangeSegmenter(SegTransform):
+    """获取训练/验证/预测所需的信息。
+    Args:
+        mode (str): 指定数据用于何种用途，取值范围为['train', 'eval', 'test', 'quant']。
+    Raises:
+        ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内
+    """
+    def __init__(self, mode):
+        if mode not in ['train', 'eval', 'test', 'quant']:
+            raise ValueError(
+                "mode should be defined as one of ['train', 'eval', 'test', 'quant']!"
+            )
+        self.mode = mode
+    def __call__(self, im, im_info, label=None):
+        """
+        Args:
+            im (np.ndarray): 图像np.ndarray数据。
+            im_info (list): 存储图像reisze或padding前的shape信息，如
+                [('resize', [200, 300]), ('padding', [400, 600])]表示
+                图像在过resize前shape为(200, 300)， 过padding前shape为
+                (400, 600)
+            label (np.ndarray): 标注图像np.ndarray数据。
+        Returns:
+            tuple: 当mode为'train'或'eval'时，返回的tuple为(im, label)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
+                当mode为'test'时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；当mode为
+                'quant'时，返回的tuple为(im,)，为图像np.ndarray数据。
+        """
+        im = permute(im, False)
+        if self.mode == 'train' or self.mode == 'eval':
+            label = label[np.newaxis, :, :]
+            return (im, label)
+        elif self.mode == 'test':
+            return (im, im_info)
+        else:
+            return (im, )
+class ComposedSegTransforms(Compose):
+    """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程，具体如下
+        训练阶段：
+        1. 随机对图像以0.5的概率水平翻转
+        2. 按不同的比例随机Resize原图
+        3. 从原图中随机crop出大小为train_crop_size大小的子图，如若crop出来的图小于train_crop_size，则会将图padding到对应大小
+        4. 图像归一化
+        预测阶段：
+        1. 图像归一化
+        Args:
+            mode(str): 图像处理所处阶段，训练/验证/预测，分别对应'train', 'eval', 'test'
+            train_crop_size(list): 模型训练阶段，随机从原图crop的大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+    def __init__(self,
+                 mode,
+                 train_crop_size=[769, 769],
+                 mean=[0.5, 0.5, 0.5],
+                 std=[0.5, 0.5, 0.5]):
+        if mode == 'train':
+            # 训练时的transforms，包含数据增强
+            pass
+        else:
+            # 验证/预测时的transforms
+            transforms = [Normalize(mean=mean, std=std)]
+        super(ComposedSegTransforms, self).__init__(transforms)
--- a/deploy/raspberry/scripts/build.sh
+++ b/deploy/raspberry/scripts/build.sh
+# Paddle-Lite预编译库的路径
+LITE_DIR=/path/to/Paddle-Lite/inference/lib
+# gflags预编译库的路径
+GFLAGS_DIR=$(pwd)/deps/gflags
+# glog预编译库的路径
+GLOG_DIR=$(pwd)/deps/glog
+# opencv预编译库的路径, 如果使用自带预编译版本可不修改
+OPENCV_DIR=$(pwd)/deps/opencv
+# 下载自带预编译版本
+exec $(pwd)/scripts/install_third-party.sh
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+    -DOPENCV_DIR=${OPENCV_DIR} \
+    -DGFLAGS_DIR=${GFLAGS_DIR} \
+    -DLITE_DIR=${LITE_DIR} \
+    -DCMAKE_CXX_FLAGS="-march=armv7-a"  
+make
--- a/deploy/raspberry/scripts/install_third-party.sh
+++ b/deploy/raspberry/scripts/install_third-party.sh
+# download third-part lib
+if [ ! -d "./deps" ]; then
+    mkdir deps
+fi
+if [ ! -d "./deps/gflag" ]; then
+    cd deps
+    git clone https://github.com/gflags/gflags
+    cd gflags
+    cmake .
+    make -j 4
+    cd ..
+    cd ..
+fi
+if [ ! -d "./deps/glog" ]; then
+    cd deps
+    git clone https://github.com/google/glog
+    sudo apt-get install autoconf automake libtool
+    cd glog
+    ./autogen.sh
+    ./configure
+    make -j 4
+    cd ..
+    cd ..
+fi
+OPENCV_URL=https://bj.bcebos.com/paddlex/deploy/armopencv/opencv.tar.bz2
+if [ ! -d "./deps/opencv" ]; then
+    cd deps
+    wget -c ${OPENCV_URL}
+    tar xvfj opencv.tar.bz2
+    rm -rf opencv.tar.bz2
+    cd ..
+fi
--- a/deploy/raspberry/src/paddlex.cpp
+++ b/deploy/raspberry/src/paddlex.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "include/paddlex/paddlex.h"
+#include <iostream>
+#include <fstream>
+namespace PaddleX {
+void Model::create_predictor(const std::string& model_dir,
+                            const std::string& cfg_file,
+                            int thread_num) {
+  paddle::lite_api::MobileConfig config;
+  config.set_model_from_file(model_dir);
+  config.set_threads(thread_num);
+  load_config(cfg_file);
+  predictor_ =
+    paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::MobileConfig>(
+      config);
+}
+bool Model::load_config(const std::string& cfg_file) {
+  YAML::Node config = YAML::LoadFile(cfg_file);
+  type = config["_Attributes"]["model_type"].as<std::string>();
+  name = config["Model"].as<std::string>();
+  bool to_rgb = true;
+  if (config["TransformsMode"].IsDefined()) {
+    std::string mode = config["TransformsMode"].as<std::string>();
+    if (mode == "BGR") {
+      to_rgb = false;
+    } else if (mode != "RGB") {
+      std::cerr << "[Init] Only 'RGB' or 'BGR' is supported for TransformsMode"
+                << std::endl;
+      return false;
+    }
+  }
+  // init preprocess ops
+  transforms_.Init(config["Transforms"], to_rgb);
+  // read label list
+  for (const auto& item : config["_Attributes"]["labels"]) {
+    int index = labels.size();
+    labels[index] = item.as<std::string>();
+  }
+  return true;
+}
+bool Model::preprocess(cv::Mat* input_im, ImageBlob* inputs) {
+  if (!transforms_.Run(input_im, inputs)) {
+    return false;
+  }
+  return true;
+}
+bool Model::predict(const cv::Mat& im, ClsResult* result) {
+  inputs_.clear();
+  if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!"
+              << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!"
+              << std::endl;
+    return false;
+  }
+  // preprocess
+  inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
+  cv::Mat im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // predict
+  predictor_->Run();
+  std::unique_ptr<const paddle::lite_api::Tensor> output_tensor(
+    std::move(predictor_->GetOutput(0)));
+  const float *outputs_data = output_tensor->mutable_data<float>();
+  // postprocess
+  auto ptr = std::max_element(outputs_data, outputs_data+sizeof(outputs_data));
+  result->category_id = std::distance(outputs_data, ptr);
+  result->score = *ptr;
+  result->category = labels[result->category_id];
+}
+bool Model::predict(const cv::Mat& im, DetResult* result) {
+  inputs_.clear();
+  result->clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  }
+  inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
+  cv::Mat im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  int h = inputs_.new_im_size_[0];
+  int w = inputs_.new_im_size_[1];
+  if (name == "YOLOv3") {
+    std::unique_ptr<paddle::lite_api::Tensor> im_size_tensor(
+      std::move(predictor_->GetInput(1)));
+    const std::vector<int64_t> IM_SIZE_SHAPE = {1, 2};
+    im_size_tensor->Resize(IM_SIZE_SHAPE);
+    auto *im_size_data = im_size_tensor->mutable_data<int>();
+    memcpy(im_size_data, inputs_.ori_im_size_.data(), 1*2*sizeof(int));
+  }
+  predictor_->Run();
+  auto output_names = predictor_->GetOutputNames();
+  auto output_box_tensor = predictor_->GetTensor(output_names[0]);
+  const float *output_box = output_box_tensor->mutable_data<float>();
+  std::vector<int64_t> output_box_shape = output_box_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_box_shape) {
+    size *= i;
+  }
+  int num_boxes = size / 6;
+  for (int i = 0; i < num_boxes; ++i) {
+    Box box;
+    box.category_id = static_cast<int>(round(output_box[i * 6]));
+    box.category = labels[box.category_id];
+    box.score = output_box[i * 6 + 1];
+    float xmin = output_box[i * 6 + 2];
+    float ymin = output_box[i * 6 + 3];
+    float xmax = output_box[i * 6 + 4];
+    float ymax = output_box[i * 6 + 5];
+    float w = xmax - xmin + 1;
+    float h = ymax - ymin + 1;
+    box.coordinate = {xmin, ymin, w, h};
+    result->boxes.push_back(std::move(box));
+  }
+  return true;
+}
+bool Model::predict(const cv::Mat& im, SegResult* result) {
+  result->clear();
+  inputs_.clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!" << std::endl;
+    return false;
+  } else if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!" << std::endl;
+    return false;
+  }
+  inputs_.input_tensor_ = std::move(predictor_->GetInput(0));
+  cv::Mat im_clone = im.clone();
+  if (!preprocess(&im_clone, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  std::cout << "Preprocess is done" << std::endl;
+  predictor_->Run();
+  auto output_names = predictor_->GetOutputNames();
+  auto output_label_tensor = predictor_->GetTensor(output_names[0]);
+  const int64_t *label_data = output_label_tensor->mutable_data<int64_t>();
+  std::vector<int64_t> output_label_shape = output_label_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_label_shape) {
+    size *= i;
+    result->label_map.shape.push_back(i);
+  }
+  result->label_map.data.resize(size);
+  memcpy(result->label_map.data.data(), label_data, size*sizeof(int64_t));
+  auto output_score_tensor = predictor_->GetTensor(output_names[1]);
+  const float *score_data = output_score_tensor->mutable_data<float>();
+  std::vector<int64_t> output_score_shape = output_score_tensor->shape();
+  size = 1;
+  for (const auto& i : output_score_shape) {
+    size *= i;
+    result->score_map.shape.push_back(i);
+  }
+  result->score_map.data.resize(size);
+  memcpy(result->score_map.data.data(), score_data, size*sizeof(float));
+  std::vector<uint8_t> label_map(result->label_map.data.begin(),
+                                 result->label_map.data.end());
+  cv::Mat mask_label(result->label_map.shape[1],
+                     result->label_map.shape[2],
+                     CV_8UC1,
+                     label_map.data());
+  cv::Mat mask_score(result->score_map.shape[2],
+                     result->score_map.shape[3],
+                     CV_32FC1,
+                     result->score_map.data.data());
+  int idx = 1;
+  int len_postprocess = inputs_.im_size_before_resize_.size();
+  for (std::vector<std::string>::reverse_iterator iter =
+           inputs_.reshape_order_.rbegin();
+       iter != inputs_.reshape_order_.rend();
+       ++iter) {
+    if (*iter == "padding") {
+      auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
+      inputs_.im_size_before_resize_.pop_back();
+      auto padding_w = before_shape[0];
+      auto padding_h = before_shape[1];
+      mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
+      mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
+    } else if (*iter == "resize") {
+      auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
+      inputs_.im_size_before_resize_.pop_back();
+      auto resize_w = before_shape[0];
+      auto resize_h = before_shape[1];
+      cv::resize(mask_label,
+                 mask_label,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_NEAREST);
+      cv::resize(mask_score,
+                 mask_score,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_LINEAR);
+    }
+    ++idx;
+  }
+  result->label_map.data.assign(mask_label.begin<uint8_t>(),
+                                mask_label.end<uint8_t>());
+  result->label_map.shape = {mask_label.rows, mask_label.cols};
+  result->score_map.data.assign(mask_score.begin<float>(),
+                                mask_score.end<float>());
+  result->score_map.shape = {mask_score.rows, mask_score.cols};
+  return true;
+}
+}  // namespace PaddleX
--- a/deploy/raspberry/src/transforms.cpp
+++ b/deploy/raspberry/src/transforms.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "include/paddlex/transforms.h"
+#include <math.h>
+#include <iostream>
+#include <string>
+#include <vector>
+namespace PaddleX {
+std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
+                                             {"NEAREST", cv::INTER_NEAREST},
+                                             {"AREA", cv::INTER_AREA},
+                                             {"CUBIC", cv::INTER_CUBIC},
+                                             {"LANCZOS4", cv::INTER_LANCZOS4}};
+bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
+  for (int h = 0; h < im->rows; h++) {
+    for (int w = 0; w < im->cols; w++) {
+      im->at<cv::Vec3f>(h, w)[0] =
+          (im->at<cv::Vec3f>(h, w)[0] / 255.0 - mean_[0]) / std_[0];
+      im->at<cv::Vec3f>(h, w)[1] =
+          (im->at<cv::Vec3f>(h, w)[1] / 255.0 - mean_[1]) / std_[1];
+      im->at<cv::Vec3f>(h, w)[2] =
+          (im->at<cv::Vec3f>(h, w)[2] / 255.0 - mean_[2]) / std_[2];
+    }
+  }
+  return true;
+}
+float ResizeByShort::GenerateScale(const cv::Mat& im) {
+  int origin_w = im.cols;
+  int origin_h = im.rows;
+  int im_size_max = std::max(origin_w, origin_h);
+  int im_size_min = std::min(origin_w, origin_h);
+  float scale =
+      static_cast<float>(short_size_) / static_cast<float>(im_size_min);
+  if (max_size_ > 0) {
+    if (round(scale * im_size_max) > max_size_) {
+      scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
+    }
+  }
+  return scale;
+}
+bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
+  float scale = GenerateScale(*im);
+  int width = static_cast<int>(round(scale * im->cols));
+  int height = static_cast<int>(round(scale * im->rows));
+  cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
+  return true;
+}
+bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
+  int height = static_cast<int>(im->rows);
+  int width = static_cast<int>(im->cols);
+  if (height < height_ || width < width_) {
+    std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
+    return false;
+  }
+  int offset_x = static_cast<int>((width - width_) / 2);
+  int offset_y = static_cast<int>((height - height_) / 2);
+  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
+  *im = (*im)(crop_roi);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+bool Padding::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("padding");
+  int padding_w = 0;
+  int padding_h = 0;
+  if (width_ > 1 & height_ > 1) {
+    padding_w = width_ - im->cols;
+    padding_h = height_ - im->rows;
+  } else if (coarsest_stride_ >= 1) {
+    int h = im->rows;
+    int w = im->cols;
+    padding_h =
+        ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+    padding_w =
+        ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+  }
+  if (padding_h < 0 || padding_w < 0) {
+    std::cerr << "[Padding] Computed padding_h=" << padding_h
+              << ", padding_w=" << padding_w
+              << ", but they should be greater than 0." << std::endl;
+    return false;
+  }
+  cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]);
+  cv::copyMakeBorder(
+      *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
+  if (long_size_ <= 0) {
+    std::cerr << "[ResizeByLong] long_size should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+  int im_size_max = std::max(origin_w, origin_h);
+  float scale =
+      static_cast<float>(long_size_) / static_cast<float>(im_size_max);
+  cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
+  return true;
+}
+bool Resize::Run(cv::Mat* im, ImageBlob* data) {
+  if (width_ <= 0 || height_ <= 0) {
+    std::cerr << "[Resize] width and height should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  if (interpolations.count(interp_) <= 0) {
+    std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_.push_back({im->rows, im->cols});
+  data->reshape_order_.push_back("resize");
+  cv::resize(
+      *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
+  transforms_.clear();
+  to_rgb_ = to_rgb;
+  for (const auto& item : transforms_node) {
+    std::string name = item.begin()->first.as<std::string>();
+    std::cout << "trans name: " << name << std::endl;
+    std::shared_ptr<Transform> transform = CreateTransform(name);
+    transform->Init(item.begin()->second);
+    transforms_.push_back(transform);
+  }
+}
+std::shared_ptr<Transform> Transforms::CreateTransform(
+    const std::string& transform_name) {
+  if (transform_name == "Normalize") {
+    return std::make_shared<Normalize>();
+  } else if (transform_name == "ResizeByShort") {
+    return std::make_shared<ResizeByShort>();
+  } else if (transform_name == "CenterCrop") {
+    return std::make_shared<CenterCrop>();
+  } else if (transform_name == "Resize") {
+    return std::make_shared<Resize>();
+  } else if (transform_name == "Padding") {
+    return std::make_shared<Padding>();
+  } else if (transform_name == "ResizeByLong") {
+    return std::make_shared<ResizeByLong>();
+  } else {
+    std::cerr << "There's unexpected transform(name='" << transform_name
+              << "')." << std::endl;
+    exit(-1);
+  }
+}
+bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
+  // preprocess by order
+  if (to_rgb_) {
+    cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
+  }
+  (*im).convertTo(*im, CV_32FC3);
+  data->ori_im_size_[0] = im->rows;
+  data->ori_im_size_[1] = im->cols;
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  for (int i = 0; i < transforms_.size(); ++i) {
+    if (!transforms_[i]->Run(im, data)) {
+      std::cerr << "Apply transforms to image failed!" << std::endl;
+      return false;
+    }
+  }
+  // image format NHWC to NCHW
+  // img data save to ImageBlob
+  int height = im->rows;
+  int width = im->cols;
+  int channels = im->channels();
+  const std::vector<int64_t> INPUT_SHAPE = {1, channels, height, width};
+  data->input_tensor_->Resize(INPUT_SHAPE);
+  auto *input_data = data->input_tensor_->mutable_data<float>();
+  for (size_t c = 0; c < channels; c++) {
+      for (size_t  h = 0; h < height; h++) {
+          for (size_t w = 0; w < width; w++) {
+              input_data[c * width * height + h * width + w] =
+                      im->at<cv::Vec3f>(h, w)[c];
+          }
+      }
+  }
+  return true;
+}
+}  // namespace PaddleX
--- a/deploy/raspberry/src/visualize.cpp
+++ b/deploy/raspberry/src/visualize.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "include/paddlex/visualize.h"
+namespace PaddleX {
+std::vector<int> GenerateColorMap(int num_class) {
+  auto colormap = std::vector<int>(3 * num_class, 0);
+  for (int i = 0; i < num_class; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return colormap;
+}
+cv::Mat Visualize(const cv::Mat& img,
+                     const DetResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold) {
+  cv::Mat vis_img = img.clone();
+  auto boxes = result.boxes;
+  for (int i = 0; i < boxes.size(); ++i) {
+    if (boxes[i].score < threshold) {
+      continue;
+    }
+    cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
+                            boxes[i].coordinate[1],
+                            boxes[i].coordinate[2],
+                            boxes[i].coordinate[3]);
+    // 生成预测框和标题
+    std::string text = boxes[i].category;
+    int c1 = colormap[3 * boxes[i].category_id + 0];
+    int c2 = colormap[3 * boxes[i].category_id + 1];
+    int c3 = colormap[3 * boxes[i].category_id + 2];
+    cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
+    text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
+    int font_face = cv::FONT_HERSHEY_SIMPLEX;
+    double font_scale = 0.5f;
+    float thickness = 0.5;
+    cv::Size text_size =
+        cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+    cv::Point origin;
+    origin.x = roi.x;
+    origin.y = roi.y;
+    // 生成预测框标题的背景
+    cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
+                                  boxes[i].coordinate[1] - text_size.height,
+                                  text_size.width,
+                                  text_size.height);
+    // 绘图和文字
+    cv::rectangle(vis_img, roi, roi_color, 2);
+    cv::rectangle(vis_img, text_back, roi_color, -1);
+    cv::putText(vis_img,
+                text,
+                origin,
+                font_face,
+                font_scale,
+                cv::Scalar(255, 255, 255),
+                thickness);
+    // 生成实例分割mask
+    if (boxes[i].mask.data.size() == 0) {
+      continue;
+    }
+    cv::Mat bin_mask(result.mask_resolution,
+                     result.mask_resolution,
+                     CV_32FC1,
+                     boxes[i].mask.data.data());
+    cv::resize(bin_mask,
+               bin_mask,
+               cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
+    cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
+    cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
+    bin_mask.copyTo(full_mask(roi));
+    cv::Mat mask_ch[3];
+    mask_ch[0] = full_mask * c1;
+    mask_ch[1] = full_mask * c2;
+    mask_ch[2] = full_mask * c3;
+    cv::Mat mask;
+    cv::merge(mask_ch, 3, mask);
+    cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
+  }
+  return vis_img;
+}
+cv::Mat Visualize(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap) {
+  std::vector<uint8_t> label_map(result.label_map.data.begin(),
+                                 result.label_map.data.end());
+  cv::Mat mask(result.label_map.shape[0],
+               result.label_map.shape[1],
+               CV_8UC1,
+               label_map.data());
+  cv::Mat color_mask = cv::Mat::zeros(
+      result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
+  int rows = img.rows;
+  int cols = img.cols;
+  for (int i = 0; i < rows; i++) {
+    for (int j = 0; j < cols; j++) {
+      int category_id = static_cast<int>(mask.at<uchar>(i, j));
+      color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
+      color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
+      color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
+    }
+  }
+  return color_mask;
+}
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path) {
+  if (access(save_dir.c_str(), 0) < 0) {
+#ifdef _WIN32
+    mkdir(save_dir.c_str());
+#else
+    if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
+      std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
+    }
+#endif
+  }
+  int pos = file_path.find_last_of(OS_PATH_SEP);
+  std::string image_name(file_path.substr(pos + 1));
+  return save_dir + OS_PATH_SEP + image_name;
+}
+}  // namespace PaddleX
--- a/docs/.DS_Store
+++ b/docs/.DS_Store
--- a/docs/README.md
+++ b/docs/README.md
 # PaddleX文档
-PaddleX的使用文档均在本目录结构下。文档采用Read the Docs方式组织，您可以直接访问[在线文档](https://paddlex.readthedocs.io/zh_CN/latest/index.html)进行查阅。
+PaddleX的使用文档均在本目录结构下。文档采用Read the Docs方式组织，您可以直接访问[在线文档](https://paddlex.readthedocs.io/zh_CN/develop/index.html)进行查阅。
 ## 编译文档
 在本目录下按如下步骤进行文档编译

--- a/docs/apis/analysis.md
+++ b/docs/apis/analysis.md
+# 数据集分析
+## paddlex.datasets.analysis.Seg
+```python
+paddlex.datasets.analysis.Seg(data_dir, file_list, label_list)
+```
+构建统计分析语义分类数据集的分析器。
+> **参数**
+> > * **data_dir** (str): 数据集所在的目录路径。  
+> > * **file_list** (str): 描述数据集图片文件和类别id的文件路径（文本内每行路径为相对`data_dir`的相对路径）。  
+> > * **label_list** (str): 描述数据集包含的类别信息文件路径。  
+### analysis
+```python
+analysis(self)
+```
+Seg分析器的分析接口，完成以下信息的分析统计：
+> * 图像数量
+> * 图像最大和最小的尺寸
+> * 图像通道数量
+> * 图像各通道的最小值和最大值
+> * 图像各通道的像素值分布
+> * 图像各通道归一化后的均值和方差
+> * 标注图中各类别的数量及比重
+[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/examples/multi-channel_remote_sensing/tools/analysis.py)
+[统计信息示例](../../examples/multi-channel_remote_sensing/analysis.html#id2)
+### cal_clipped_mean_std
+```python
+cal_clipped_mean_std(self, clip_min_value, clip_max_value, data_info_file)
+```
+Seg分析器用于计算图像截断后的均值和方差的接口。
+> **参数**
+> > * **clip_min_value** (list):  截断的下限，小于min_val的数值均设为min_val。
+> > * **clip_max_value** (list): 截断的上限，大于max_val的数值均设为max_val。
+> > * **data_info_file** (str): 在analysis()接口中保存的分析结果文件(名为`train_information.pkl`)的路径。
+[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/examples/multi-channel_remote_sensing/tools/cal_clipped_mean_std.py)
+[计算结果示例](../../examples/multi-channel_remote_sensing/analysis.html#id4)
--- a/docs/apis/deploy.md
+++ b/docs/apis/deploy.md
@@ -7,7 +7,7 @@
 图像分类、目标检测、实例分割、语义分割统一的预测器，实现高性能预测。
 ```
-paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_trt=False, use_glog=False, memory_optimize=True)
+paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, mkl_thread_num=4, use_trt=False, use_glog=False, memory_optimize=True)
 ```
 **参数**
@@ -16,6 +16,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_
 > * **use_gpu** (bool): 是否使用GPU进行预测。
 > * **gpu_id** (int): 使用的GPU序列号。
 > * **use_mkl** (bool): 是否使用mkldnn加速库。
+> * **mkl_thread_num** (int): 使用mkldnn加速库时的线程数，默认为4
 > * **use_trt** (boll): 是否使用TensorRT预测引擎。
 > * **use_glog** (bool): 是否打印中间日志。
 > * **memory_optimize** (bool): 是否优化内存使用。
@@ -44,7 +45,7 @@ predict(image, topk=1)
 ### batch_predict 接口
 ```
-batch_predict(image_list, topk=1, thread_num=2)
+batch_predict(image_list, topk=1)
 ```
 批量图片预测接口。
@@ -52,4 +53,3 @@ batch_predict(image_list, topk=1, thread_num=2)
 >
 > > * **image_list** (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径或numpy数组(HWC排列，BGR格式)。
 > > * **topk** (int): 图像分类时使用的参数，表示预测前topk个可能的分类。
-> > * **thread_num** (int): 并发执行各图像预处理时的线程数。
--- a/docs/apis/index.rst
+++ b/docs/apis/index.rst
@@ -6,6 +6,7 @@ API接口说明
   transforms/index.rst
   datasets.md
+   analysis.md
   models/index.rst
   slim.md
   visualize.md

--- a/docs/apis/interpret.md
+++ b/docs/apis/interpret.md
@@ -23,8 +23,12 @@ LIME表示与模型无关的局部可解释性，可以解释任何模型。LIME
 >* **batch_size** (int): 预测数据batch大小，默认为50。
 >* **save_dir** (str): 可解释性可视化结果（保存为png格式文件）和中间文件存储路径。
+### 可视化效果
+![](./docs/gui/images/LIME.png)
 ### 使用示例
 > 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/lime.py)。

--- a/docs/apis/models/classification.md
+++ b/docs/apis/models/classification.md
@@ -62,7 +62,7 @@ evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False)
 ### predict
 ```python
-predict(self, img_file, transforms=None, topk=5)
+predict(self, img_file, transforms=None, topk=1)
 ```
 > 分类模型预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`predict`接口时，用户需要再重新定义test_transforms传入给`predict`接口。
@@ -81,7 +81,7 @@ predict(self, img_file, transforms=None, topk=5)
 ### batch_predict
 ```python
-batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2)
+batch_predict(self, img_file_list, transforms=None, topk=1)
 ```
 > 分类模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`ResNet50.test_transforms`和`ResNet50.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义test_transforms传入给`batch_predict`接口。
@@ -91,7 +91,6 @@ batch_predict(self, img_file_list, transforms=None, topk=5, thread_num=2)
 > > - **img_file_list** (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.cls.transforms): 数据预处理操作。
 > > - **topk** (int): 预测时前k个最大值。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 > **返回值**
 >

--- a/docs/apis/models/detection.md
+++ b/docs/apis/models/detection.md
@@ -108,7 +108,7 @@ predict(self, img_file, transforms=None)
 ### batch_predict
 ```python
-batch_predict(self, img_file_list, transforms=None, thread_num=2)
+batch_predict(self, img_file_list, transforms=None)
 ```
 > PPYOLO模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义`test_transforms`传入给`batch_predict`接口
@@ -117,7 +117,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
 >
 > > - **img_file_list** (str|np.ndarray): 对列表（或元组）中的图像同时进行预测，列表中的元素是预测图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 >
 > **返回值**
 >
@@ -222,7 +221,7 @@ predict(self, img_file, transforms=None)
 ### batch_predict
 ```python
-batch_predict(self, img_file_list, transforms=None, thread_num=2)
+batch_predict(self, img_file_list, transforms=None)
 ```
 > YOLOv3模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`YOLOv3.test_transforms`和`YOLOv3.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义`test_transforms`传入给`batch_predict`接口
@@ -231,7 +230,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
 >
 > > - **img_file_list** (str|np.ndarray): 对列表（或元组）中的图像同时进行预测，列表中的元素是预测图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 >
 > **返回值**
 >
@@ -327,7 +325,7 @@ predict(self, img_file, transforms=None)
 ### batch_predict
 ```python
-batch_predict(self, img_file_list, transforms=None, thread_num=2)
+batch_predict(self, img_file_list, transforms=None)
 ```
 > FasterRCNN模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义test_transforms传入给`batch_predict`接口。
@@ -336,7 +334,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
 >
 > > - **img_file_list** (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素是预测图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 >
 > **返回值**
 >

--- a/docs/apis/models/instance_segmentation.md
+++ b/docs/apis/models/instance_segmentation.md
@@ -88,7 +88,7 @@ predict(self, img_file, transforms=None)
 #### batch_predict
 ```python
-batch_predict(self, img_file_list, transforms=None, thread_num=2)
+batch_predict(self, img_file_list, transforms=None)
 ```
 > MaskRCNN模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`FasterRCNN.test_transforms`和`FasterRCNN.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义test_transforms传入给`batch_predict`接口。
@@ -97,7 +97,6 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2)
 >
 > > - **img_file_list** (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是预测图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.det.transforms): 数据预处理操作。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 >
 > **返回值**
 >

--- a/docs/apis/models/semantic_segmentation.md
+++ b/docs/apis/models/semantic_segmentation.md
@@ -3,8 +3,7 @@
 ## paddlex.seg.DeepLabv3p
 ```python
-paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride=16, aspp_with_sep_conv=True, decoder_use_sep_conv=True, encoder_with_aspp=True, enable_decoder=True, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, pooling_crop_size=None)
+paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride=16, aspp_with_sep_conv=True, decoder_use_sep_conv=True, encoder_with_aspp=True, enable_decoder=True, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, pooling_crop_size=None, input_channel=3)
 ```
 > 构建DeepLabv3p分割器。
@@ -23,6 +22,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
 > > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
 > > - **pooling_crop_size** (int)：当backbone为`MobileNetV3_large_x1_0_ssld`时，需设置为训练过程中模型输入大小，格式为[W, H]。例如模型输入大小为[512, 512], 则`pooling_crop_size`应该设置为[512, 512]。在encoder模块中获取图像平均值时被用到，若为None，则直接求平均值；若为模型输入大小，则使用`avg_pool`算子得到平均值。默认值None。
+> > - **input_channel** (int): 输入图像通道数。默认值3。
 ### train
@@ -95,7 +95,7 @@ predict(self, img_file, transforms=None):
 ### batch_predict
 ```
-batch_predict(self, img_file_list, transforms=None, thread_num=2):
+batch_predict(self, img_file_list, transforms=None):
 ```
 > DeepLabv3p模型批量预测接口。需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`batch_predict`接口时，用户需要再重新定义test_transforms传入给`batch_predict`接口。
@@ -104,13 +104,40 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2):
 > >
 > > - **img_file_list** (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是预测图像路径或numpy数组(HWC排列，BGR格式)。
 > > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
-> > - **thread_num** (int): 并发执行各图像预处理时的线程数。
 > **返回值**
 > >
 > > - **dict**: 每个元素都为列表，表示各图像的预测结果。各图像的预测结果用字典表示，包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，像素值表示对应的类别，'score_map'存储各类别的概率，shape=(h, w, num_classes)。
+### overlap_tile_predict
+```
+overlap_tile_predict(self, img_file, tile_size=[512, 512], pad_size=[64, 64], batch_size=32, transforms=None)
+```
+> DeepLabv3p模型的滑动预测接口, 支持有重叠和无重叠两种方式。
+> **无重叠的滑动窗口预测**：在输入图片上以固定大小的窗口滑动，分别对每个窗口下的图像进行预测，最后将各窗口的预测结果拼接成输入图片的预测结果。**使用时需要把参数`pad_size`设置为`[0, 0]`**。
+> **有重叠的滑动窗口预测**：在Unet论文中，作者提出一种有重叠的滑动窗口预测策略（Overlap-tile strategy）来消除拼接处的裂痕感。对各滑动窗口预测时，会向四周扩展一定的面积，对扩展后的窗口进行预测，例如下图中的蓝色部分区域，到拼接时只取各窗口中间部分的预测结果，例如下图中的黄色部分区域。位于输入图像边缘处的窗口，其扩展面积下的像素则通过将边缘部分像素镜像填补得到。
+![](../../../examples/remote_sensing/images/overlap_tile.png)
+> 需要注意的是，只有在训练过程中定义了eval_dataset，模型在保存时才会将预测时的图像处理流程保存在`DeepLabv3p.test_transforms`和`DeepLabv3p.eval_transforms`中。如未在训练时定义eval_dataset，那在调用预测`overlap_tile_predict`接口时，用户需要再重新定义test_transforms传入给`overlap_tile_predict`接口。
+> **参数**
+> >
+> > - **img_file** (str|np.ndarray): 预测图像路径或numpy数组(HWC排列，BGR格式)。
+> > - **tile_size** (list|tuple): 滑动窗口的大小，该区域内用于拼接预测结果，格式为（W，H）。默认值为[512, 512]。
+> > - **pad_size** (list|tuple): 滑动窗口向四周扩展的大小，扩展区域内不用于拼接预测结果，格式为（W，H）。默认值为[64, 64]。
+> > - **batch_size** (int)：对窗口进行批量预测时的批量大小。默认值为32。
+> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
+> **返回值**
+> >
+> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，像素值表示对应的类别，'score_map'存储各类别的概率，shape=(h, w, num_classes)。
 ### tile_predict
@@ -155,7 +182,7 @@ batch_predict(self, img_file_list, transforms=None, thread_num=2):
 ## paddlex.seg.UNet
 ```python
-paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
+paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, input_channel=3)
 ```
 > 构建UNet分割器。
@@ -168,18 +195,18 @@ paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, us
 > > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
 > > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
+> > - **input_channel** (int): 输入图像通道数。默认值3。
 > - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
 > - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
 > - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
 > - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
-> - tile_predict 无重叠的大图切小图预测接口同 [DeepLabv3p模型tile_predict接口](#tile-predict)
+> - overlap_tile_predict 滑动窗口预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
-> - overlap_tile_predict 有重叠的大图切小图预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
 ## paddlex.seg.HRNet
 ```python
-paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
+paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, input_channel=3)
 ```
 > 构建HRNet分割器。
@@ -192,18 +219,18 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal
 > > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数，只能用于两类分割，可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时，使用交叉熵损失函数。默认False。
 > > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
+> > - **input_channel** (int): 输入图像通道数。默认值3。
 > - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
 > - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
 > - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
 > - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
-> - tile_predict 无重叠的大图切小图预测接口同 [DeepLabv3p模型tile_predict接口](#tile-predict)
+> - overlap_tile_predict 滑动窗预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
-> - overlap_tile_predict 有重叠的大图切小图预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
 ## paddlex.seg.FastSCNN
 ```python
-paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
+paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0], input_channel=3)
 ```
 > 构建FastSCNN分割器。
@@ -216,10 +243,10 @@ paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, cla
 > > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候，长度应为`num_classes`。当`class_weight`为str时， weight.lower()应为'dynamic'，这时会根据每一轮各类像素的比重自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，即平时使用的交叉熵损失函数。
 > > - **ignore_index** (int): label上忽略的值，label为`ignore_index`的像素不参与损失函数的计算。默认255。
 > > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss，即默认值为[1.0]。也支持计算两个分支或三个分支上的loss，权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列，fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重，higher_branch_weight为空间细节分支上的loss权重，lower_branch_weight为全局上下文分支上的loss权重，若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
+> > - **input_channel** (int): 输入图像通道数。默认值3。
 > - train 训练接口说明同 [DeepLabv3p模型train接口](#train)
 > - evaluate 评估接口说明同 [DeepLabv3p模型evaluate接口](#evaluate)
 > - predict 预测接口说明同 [DeepLabv3p模型predict接口](#predict)
 > - batch_predict 批量预测接口说明同 [DeepLabv3p模型predict接口](#batch-predict)
-> - tile_predict 无重叠的大图切小图预测接口同 [DeepLabv3p模型tile_predict接口](#tile-predict)
+> - overlap_tile_predict 滑动窗预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
-> - overlap_tile_predict 有重叠的大图切小图预测接口同 [DeepLabv3p模型poverlap_tile_predict接口](#overlap-tile-predict)
--- a/docs/apis/transforms/seg_transforms.md
+++ b/docs/apis/transforms/seg_transforms.md
@@ -78,16 +78,19 @@ paddlex.seg.transforms.ResizeStepScaling(min_scale_factor=0.75, max_scale_factor
 ## Normalize
 ```python
-paddlex.seg.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+paddlex.seg.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0])
 ```
 对图像进行标准化。
-1.图像像素归一化到区间 [0.0, 1.0]。
+1.像素值减去min_val
-2.对图像进行减均值除以标准差操作。
+2.像素值除以(max_val-min_val), 归一化到区间 [0.0, 1.0]。
+3.对图像进行减均值除以标准差操作。
 ### 参数
 * **mean** (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
 * **std** (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
+* **min_val** (list): 图像数据集的最小值。默认值[0, 0, 0]。
+* **max_val** (list): 图像数据集的最大值。默认值[255.0, 255.0, 255.0]。
 ## Padding
 ```python
@@ -167,6 +170,16 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
 * **hue_range** (int): 色调因子的范围。默认为18。
 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
+## Clip
+```python
+paddlex.seg.transforms.Clip(min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0])
+```
+对图像上超出一定范围的数据进行截断。
+### 参数
+* **min_val** (list): 裁剪的下限，小于min_val的数值均设为min_val. 默认值0。
+* **max_val** (list): 裁剪的上限，大于max_val的数值均设为max_val. 默认值255.0。
 <!--
 ## ComposedSegTransforms
 ```python

--- a/docs/apis/visualize.md
+++ b/docs/apis/visualize.md
@@ -27,7 +27,7 @@ pdx.det.visualize('./xiaoduxiong_epoch_12/xiaoduxiong.jpeg', result, save_dir='.
 ## paddlex.seg.visualize
 > **语义分割模型预测结果可视化**  
 ```
-paddlex.seg.visualize(image, result, weight=0.6, save_dir='./')
+paddlex.seg.visualize(image, result, weight=0.6, save_dir='./', color=None)
 ```
 将语义分割模型预测得到的Mask在原图上进行可视化。
@@ -36,6 +36,7 @@ paddlex.seg.visualize(image, result, weight=0.6, save_dir='./')
 > * **result** (str): 模型预测结果。
 > * **weight**(float): mask可视化结果与原图权重因子，weight表示原图的权重。默认0.6。
 > * **save_dir**(str): 可视化结果保存路径。若为None，则表示不保存，该函数将可视化的结果以np.ndarray的形式返回；若设为目录路径，则将可视化结果保存至该目录下。默认值为'./'。
+> * **color** (list): 各类别的BGR颜色值组成的列表。例如两类时可设置为[255, 255, 255, 0, 0, 0]。默认值为None，则使用默认生成的颜色列表。
 ### 使用示例
 > 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/cityscape_deeplab.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/city.png)
@@ -130,8 +131,8 @@ paddlex.transforms.visualize(dataset,
 ```
 对数据预处理/增强中间结果进行可视化。
 可使用VisualDL查看中间结果：
-1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001
+1. VisualDL启动方式: visualdl --logdir vdl_output/image_transforms --port 8001
-2. 浏览器打开 https://0.0.0.0:8001 即可，
+2. 浏览器打开 https://0.0.0.0:8001 ，在页面上面点击`『样本数据-图像』`即可。
    其中0.0.0.0为本机访问，如为远程服务, 改成相应机器IP
 ### 参数

--- a/docs/appendix/how_to_offline_run.md
+++ b/docs/appendix/how_to_offline_run.md
@@ -5,35 +5,29 @@ PaddleX在模型训练时，存在以下两种情况需要进行联网下载
 > 2.模型裁剪训练时，用户没有配置自定义的参数敏感度信息文件`sensitivities_file`，并将`sensitivities_file`配置成了'DEFAULT'字符串，此时PaddleX会自动联网下载模型在标准数据集上计算得到的参数敏感度信息文件。
-## 如何在没联网的情况下进行模型训练
+用户可以通过本文最末的代码先下载好所有的预训练模型到指定的目录（在代码中我们下载到了`/home/work/paddlex_pretrain`目录)
-> 在训练模型时，不管是正常训练还是裁剪训练，用户可以提前准备好预训练权重或参数敏感度信息文档，只需自定义`pretrain_weights`或`sensitivities_file`， 将其设为本地的路径即可。
+在训练模型时，需要配置paddlex全局预训练模型路径，将此路径指定到存放了所有预训练模型的路径即可，如下示例代码
+```
-## 预训练模型下载地址
+import paddlex as pdx
-> 以下模型均为分类模型权重（UNet除外），用户在训练模型时，需要**根据分类模型的种类或backbone的种类**，选择对应的模型权重进行下载(目标检测在使用ResNet50作为Backbone时，使用下面表格中的ResNet50_cos作为预训练模型)
+# 在import paddlex指定全局的预训练模型路径
+# 模型训练时会跳过下载的过程，使用该目录下载好的模型
-| 模型(点击下载) | 数据集 |
+pdx.pretrain_dir = '/home/work/paddlex_pretrain'
-| :------------|:------|
+```
-| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | ImageNet |
+按上方式配置后，之后即可进行无联网模型训练
-| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | ImageNet |
-| [ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | ImageNet |
-| [ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | ImageNet |
+### 下载所有预训练模型代码
-| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | ImageNet |
-| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | ImageNet |
+> 所有预训练模型下载解压后约为7.5G
-| [MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | ImageNet |
+```
-| [MobileNetV2_x1.0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | ImageNet |
+from paddlex.cv.models.utils.pretrain_weights import image_pretrain
-| [MobileNetV2_x0.5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | ImageNet |
+from paddlex.cv.models.utils.pretrain_weights import coco_pretrain
-| [MobileNetV2_x2.0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | ImageNet |
+import paddlehub as hub
-| [MobileNetV2_x0.25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | ImageNet |
-| [MobileNetV2_x1.5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | ImageNet |
+save_dir = '/home/work/paddlex_pretrain'
-| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | ImageNet |
+for name, url in image_pretrain.items():
-| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar) | ImageNet |
+    hub.download(name, save_dir)
-| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | ImageNet |
+for name, url in coco_pretrain.items():
-| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | ImageNet |
+    hub.download(name, save_dir)
-| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar) | ImageNet |
+```
-| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | ImageNet |
-| [ResNet50_cos](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar) | ImageNet |
-| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | ImageNet |
-| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | ImageNet |
-| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | ImageNet |
-| [UNet](https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz) | MSCOCO |
--- a/docs/change_log.md
+++ b/docs/change_log.md
 # 更新日志
+**v1.2.0** 2020.09.07
+- 模型更新
+  > - 新增目标检测模型PPYOLO[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-ppyolo)
+  > - FasterRCNN、MaskRCNN、YOLOv3、DeepLabv3p等模型新增内置COCO数据集预训练模型
+  > - 目标检测模型FasterRCNN和MaskRCNN新增backbone HRNet_W18[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn)
+  > - 语义分割模型DeepLabv3p新增backbone MobileNetV3_large_ssld[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#paddlex-seg-deeplabv3p)
+- 模型部署更新
+  > - 新增模型通过OpenVINO的部署方案[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/deploy/openvino/index.html)
+  > - 新增模型在树莓派上的部署方案[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/deploy/raspberry/index.html)
+  > - 优化PaddleLite Android部署的数据预处理和后处理代码性能
+  > - 优化Paddle服务端C++代码部署代码，增加use_mkl等参数，通过mkldnn显著提升模型在CPU上的预测性能
+- 产业案例更新
+  > - 新增RGB图像遥感分割案例[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/examples/remote_sensing.html)
+  > - 新增多通道遥感分割案例[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/examples/multi-channel_remote_sensing/README.html)
+- 其它
+  > - 新增数据集切分功能，支持通过命令行切分ImageNet、PascalVOC、MSCOCO和语义分割数据集[详情链接](https://paddlex.readthedocs.io/zh_CN/develop/data/format/classification.html#id2)
 **v1.1.0** 2020.07.12
@@ -13,8 +32,8 @@
 > - 新增Jetson、Paddle Lite模型部署预测方案
 > - C++部署代码新增batch批预测，并采用OpenMP对预处理进行并行加速
 - 新增2个PaddleX产业案例
-> - [人像分割案例]()
+> - [人像分割案例](https://paddlex.readthedocs.io/zh_CN/develop/examples/human_segmentation.html)
-> - [工业表计读数案例]()
+> - [工业表计读数案例](https://paddlex.readthedocs.io/zh_CN/develop/examples/meter_reader.html)
 - 新增数据格式转换功能，LabelMe、精灵标注助手和EasyData平台标注的数据转为PaddleX支持加载的数据格式
 - PaddleX文档更新，优化文档结构

--- a/docs/data/annotation.md
+++ b/docs/data/annotation.md
@@ -39,3 +39,29 @@ paddlex --data_conversion --source labelme --to PascalVOC --pics ./pics --annota
 **注意**：  
 1. 精灵标注的目标检测数据可以在工具内部导出为PascalVOC格式，因此paddlex未提供精灵标注数据到PascalVOC格式的转换  
 2. 在将LabelMe数据集转换为COCO数据集时，LabelMe的图像文件名和json文件名需要一一对应，才可正确转换
+## 手机拍照标注说明
+当您收集的样本图像来源于手机拍照时，请注意由于手机拍照信息内附带水平垂直方向信息，这可能会使得在标注和训练时出现问题，因此在拍完照后注意根据方向对照片进行处理，使用如下函数即可解决
+```
+from PIL import Image, ExifTags
+def rotate(im):
+    try:
+        for orientation in ExifTags.TAGS.keys():
+            if ExifTags.TAGS[orientation] == 'Orientation':
+                break
+        exif = dict(im._getexif().items())
+        if exif[orientation] == 3:
+            im = im.rotate(180, expand=True)
+        if exif[orientation] == 6:
+            im = im.rotate(270, expand=True)
+        if exif[orientation] == 8:
+            im = im.rotate(90, expand=True)
+    except:
+        pass
+img_file = '1.jpeg'
+im = Image.open(img_file)
+rotate(im)
+im.save('new_1.jpeg')
+```
--- a/docs/deploy/hub_serving.md
+++ b/docs/deploy/hub_serving.md
@@ -10,12 +10,12 @@
 下面，我们按照步骤，实现将一个图像分类模型[MobileNetV3_small_ssld](https://bj.bcebos.com/paddlex/models/mobilenetv3_small_ssld_imagenet.tar.gz)转换成`PaddleHub`的预训练模型，并利用`PaddleHub-Serving`实现一键部署。
-# 模型部署
+## 模型部署
-## 1 部署模型准备
+### 1 部署模型准备
 部署模型的格式均为目录下包含`__model__`，`__params__`和`model.yml`三个文件，如若不然，则参照[部署模型导出文档](./export_model.md)进行导出。
-## 2 模型转换
+### 2 模型转换
 首先，我们将`PaddleX`的`Inference Model`转换成`PaddleHub`的预训练模型，使用命令`hub convert`即可一键转换，对此命令的说明如下：
 ```shell
@@ -45,7 +45,7 @@ $ The converted module is stored in `MobileNetV3_small_ssld_hub_1596077881.86850
 ```
 等待生成成功的提示后，我们就在输出目录中得到了一个`PaddleHub`的一个预训练模型。
-## 3 模型安装
+### 3 模型安装
 在模型转换一步中，我们得到了一个`.tar.gz`格式的预训练模型压缩包，在进行部署之前需要先安装到本机，使用命令`hub install`即可一键安装，对此命令的说明如下：
 ```shell
 $ hub install ${MODULE}
@@ -61,7 +61,7 @@ hub install MobileNetV3_small_ssld_hub_1596077881.868501/mobilenetv3_small_ssld_
 $ Successfully installed mobilenetv3_small_ssld_imagenet_hub
 ```
-## 4 模型部署
+### 4 模型部署
 下面，我们只需要使用`hub serving`命令即可完成模型的一键部署，对此命令的说明如下：
 ```shell
 $ hub serving start --modules/-m [Module1==Version1, Module2==Version2, ...] \
@@ -107,7 +107,7 @@ $ hub serving start -m mobilenetv3_small_ssld_imagenet_hub
 |modules_info|PaddleHub Serving预安装模型，以字典列表形式列出，key为模型名称。其中:<br>`init_args`为模型加载时输入的参数，等同于`paddlehub.Module(**init_args)`<br>`predict_args`为模型预测时输入的参数，以`mobilenetv3_small_ssld_imagenet_hub`为例，等同于`mobilenetv3_small_ssld_imagenet_hub.batch_predict(**predict_args)`
 |port|服务端口，默认为8866|
-## 5 测试
+### 5 测试
 在第二步模型安装的同时，会生成一个客户端请求示例，存放在模型安装目录，默认为`${HUB_HOME}/.paddlehub/modules`，对于此例，我们可以在`~/.paddlehub/modules/mobilenetv3_small_ssld_imagenet_hub`找到此客户端示例`serving_client_demo.py`，代码如下：
 ```python

--- a/docs/deploy/index.rst
+++ b/docs/deploy/index.rst
@@ -11,3 +11,6 @@
   server/index
   nvidia-jetson.md
   paddlelite/index
+   openvino/index
+   raspberry/index
+   opencv.md
--- a/docs/deploy/nvidia-jetson.md
+++ b/docs/deploy/nvidia-jetson.md
@@ -14,7 +14,7 @@
 `git clone https://github.com/PaddlePaddle/PaddleX.git`
-**说明**：其中`C++`预测代码在`/root/projects/PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
+**说明**：其中`C++`预测代码在`PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
 ### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference

--- a/docs/deploy/opencv.md
+++ b/docs/deploy/opencv.md
+# 部署编译说明
+目前PaddleX所有的模型部署测试环境为
+- Ubuntu 16.04/18.04  / Windows 10
+- gcc 4.8.5 / Microsoft Visual Studio 2019
+如果切换至其它Linux系统环境（gcc版本不变)，预期会遇到opencv的问题。
+在Linux编译脚本中，例如`deploy/cpp/script/build.sh`中，依赖`deploy/cpp/script/bootstrap.sh`去自动下载预先已经编译好的依赖的opencv库和加密库。而目前`bootstrap.sh`只提供了OpenCV在Ubuntu16.04/18.04两个系统环境下的预编译包，如果你的系统与此不同，尝试按照如下方式解决。
+## Linux下自编译OpenCV
+### 1. 下载OpenCV Source Code  
+前往OpenCV官方网站下载OpenCV 3.4.6 Source Code，或者直接[点击这里](https://bj.bcebos.com/paddlex/deploy/opencv-3.4.6.zip)下载我们已经上传至服务器的源码压缩包。
+### 2. 编译OpenCV
+确认自己的gcc/g++版本为4.8.5版本，编译过程参考如下代码  
+当前opencv-3.4.6.zip存放路径为`/home/paddlex/opencv-3.4.6.zip`
+```
+unzip opencv-3.4.6.zip
+cd opencv-3.4.6
+mkdir build && cd build
+mkdir opencv3.4.6gcc4.8ffmpeg
+cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=/home/paddlex/opencv-3.4.6/build/opencv3.4.6gcc4.8ffmpeg -D WITH_FFMPEG=ON ..
+make -j5
+make install
+```
+编译好的opencv会存放在设定的`/home/paddlex/opencv-3.4.6/build/opencv3.4.6gcc4.8ffmpeg`下
+### 3. 编译PaddleX预测代码依赖自己的opencv
+修改`deploy/cpp/script/build.sh`
+1. 注释或删除掉如下代码
+```
+{
+    bash $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具和opencv依赖库
+} || {
+    echo "Fail to execute script/bootstrap.sh"
+    exit -1
+}
+```
+2. 模型加密开关设置
+如果您不需要用到PaddleX的模型加密功能，则将如下开关修改为OFF即可
+```
+WITH_ENCRYPTION=OFF
+```
+如果需要用到加密，则请手动下载加密库后解压，[点击下载](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
+3. 设置依赖库路径
+将`OPENCV_DIR`设置为自己编译好的路径，如
+```
+OPENCV_DIR=/home/paddlex/opencv-3.4.6/build/opencv3.4.6gcc4.8ffmpeg
+```
+如果您还需要用到模型加密，已经将`WITH_ENCRYPTION`设为`ON`的前提下，也同时将`ENCRYPTION_DIR`设置为自己下载解压后的路径，如
+```
+ENCRYPTION_DIR=/home/paddlex/paddlex-encryption
+```
+4. 执行`sh script/build.sh`编译即可
+## 反馈
+如在使用中仍然存在问题，请前往PaddleX的Github提ISSUE反馈给我们。
+- [PaddleX Issue](https://github.com/PaddlePaddle/PaddleX/issues)
--- a/docs/deploy/openvino/export_openvino_model.md
+++ b/docs/deploy/openvino/export_openvino_model.md
+# OpenVINO模型转换
+将Paddle模型转换为OpenVINO的Inference Engine  
+## 环境依赖
+* ONNX 1.5.0+
+* PaddleX 1.0+
+* OpenVINO 2020.4
+**说明**：PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) ， OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)，ONNX请安装1.5.0以上版本否则会出现转模型错误。
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录 `/root/projects/`演示**。
+## 导出inference模型
+paddle模型转openvino之前需要先把paddle模型导出为inference格式模型，导出的模型将包括__model__、__params__和model.yml三个文件名，导出命令如下
+```
+paddlex --export_inference --model_dir=/path/to/paddle_model --save_dir=./inference_model --fixed_input_shape=[w,h]
+```
+## 导出OpenVINO模型
+```
+cd /root/projects/python
+python convertor.py --model_dir /path/to/inference_model --save_dir /path/to/openvino_model --fixed_input_shape [w,h]
+```
+**转换成功后会在save_dir下出现后缀名为.xml、.bin、.mapping三个文件**  
+转换参数说明如下：
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | Paddle模型路径，请确保__model__, \_\_params__model.yml在同一个目录|
+| --save_dir  | OpenVINO模型保存路径 |
+| --fixed_input_shape  | 模型输入的[W,H] |
+| --data type(option)  | FP32、FP16，默认为FP32，VPU下的IR需要为FP16 |  
+**注意**：
+- 由于OpenVINO不支持ONNX的resize-11 OP的原因，目前还不支持Paddle的分割模型
+- YOLOv3在通过OpenVINO部署时，由于OpenVINO对ONNX OP的支持限制，我们在将YOLOv3的Paddle模型导出时，对最后一层multiclass_nms进行了特殊处理，导出的ONNX模型，最终输出的Box结果包括背景类别（而Paddle模型不包含），此处在OpenVINO的部署代码中，我们通过后处理过滤了背景类别。
--- a/docs/deploy/openvino/index.rst
+++ b/docs/deploy/openvino/index.rst
@@ -6,6 +6,8 @@ OpenVINO部署
   :maxdepth: 2
   :caption: 文档目录:
+   introduction.md
   windows.md
   linux.md
-   intel_movidius.md
+   python.md
+   export_openvino_model.md 
--- a/docs/deploy/openvino/intel_movidius.md
+++ b/docs/deploy/openvino/intel_movidius.md
-# Intel计算棒
--- a/docs/deploy/openvino/introduction.md
+++ b/docs/deploy/openvino/introduction.md
+# OpenVINO部署简介
+PaddleX支持将训练好的Paddle模型通过OpenVINO实现模型的预测加速，OpenVINO详细资料与安装流程请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)
+## 部署支持情况
+下表提供了PaddleX在不同环境下对使用OpenVINO加速的支持情况  
+|硬件平台|Linux|Windows|Raspbian OS|c++|python |分类|检测|分割|
+| ----|  ---- | ---- | ----|  ---- | ---- |---- | ---- |---- |
+|CPU|支持|支持|不支持|支持|支持|支持|支持|不支持|
+|VPU|支持|支持|支持|支持|支持|支持|不支持|不支持|  
+**注意**：其中Raspbian OS为树莓派操作系统。检测模型仅支持YOLOV3，由于OpenVINO不支持ONNX的resize-11 OP的原因，目前还不支持Paddle的分割模型
+## 部署流程
+**PaddleX到OpenVINO的部署流程可以分为如下两步**： 
+  * **模型转换**:将Paddle的模型转换为OpenVINO的Inference Engine
+  * **预测部署**:使用Inference Engine进行预测
+## 模型转换 
+**模型转换请参考文档[模型转换](./export_openvino_model.md)**  
+**说明**：由于不同软硬件平台下OpenVINO模型转换方法一致，故如何转换模型后续文档中不再赘述。
+## 预测部署
+由于不同软硬下部署OpenVINO实现预测的方式不完全一致，具体请参考：  
+**[Linux](./linux.md)**:介绍了PaddleX在操作系统为Linux或者Raspbian OS，编程语言为C++，硬件平台为
+CPU或者VPU的情况下使用OpenVINO进行预测加速  
+**[Windows](./windows.md)**:介绍了PaddleX在操作系统为Window，编程语言为C++，硬件平台为CPU或者VPU的情况下使用OpenVINO进行预测加速  
+**[Python](./python.md)**:介绍了PaddleX在python下使用OpenVINO进行预测加速
\ No newline at end of file
--- a/docs/deploy/openvino/linux.md
+++ b/docs/deploy/openvino/linux.md
 # Linux平台
+## 前置条件
+* OS: Ubuntu、Raspbian OS
+* GCC* 5.4.0
+* CMake 3.0+
+* PaddleX 1.0+
+* OpenVINO 2020.4
+* 硬件平台：CPU、VPU
+**说明**：PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) ， OpenVINO安装请根据相应的系统参考[OpenVINO-Linux](https://docs.openvinotoolkit.org/latest/_docs_install_guides_installing_openvino_linux.html)或者[OpenVINO-Raspbian](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_raspbian.html)
+请确保系统已经安装好上述基本软件，并配置好相应环境，**下面所有示例以工作目录 `/root/projects/`演示**。
+## 预测部署  
+文档提供了c++下预测部署的方法，如果需要在python下预测部署请参考[python预测部署](./python.md)
+### Step1 下载PaddleX预测代码
+```
+mkdir -p /root/projects
+cd /root/projects
+git clone https://github.com/PaddlePaddle/PaddleX.git
+```
+**说明**：其中C++预测代码在PaddleX/deploy/openvino 目录，该目录不依赖任何PaddleX下其他目录。
+### Step2 软件依赖
+提供了依赖软件预编包或者一键编译，用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考：
+- gflags：编译请参考 [编译文档](https://gflags.github.io/gflags/#download)  
+- glog：编译请参考[编译文档](https://github.com/google/glog)
+- opencv: 编译请参考
+[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
+### Step3: 编译
+编译`cmake`的命令在`scripts/build.sh`中，若在树莓派(Raspbian OS)上编译请修改ARCH参数x86为armv7，若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数，其主要内容说明如下：
+```
+# openvino预编译库的路径
+OPENVINO_DIR=$INTEL_OPENVINO_DIR/inference_engine
+# gflags预编译库的路径
+GFLAGS_DIR=$(pwd)/deps/gflags
+# glog预编译库的路径
+GLOG_DIR=$(pwd)/deps/glog
+# ngraph lib预编译库的路径
+NGRAPH_LIB=$INTEL_OPENVINO_DIR/deployment_tools/ngraph/lib
+# opencv预编译库的路径
+OPENCV_DIR=$(pwd)/deps/opencv/
+#cpu架构（x86或armv7）
+ARCH=x86
+```
+执行`build`脚本：
+ ```shell
+ sh ./scripts/build.sh
+ ```  
+### Step4: 预测
+编译成功后，分类任务的预测可执行程序为`classifier`，检测任务的预测可执行程序为`detector`，其主要命令参数说明如下：
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | 模型转换生成的.xml文件路径，请保证模型转换生成的三个文件在同一路径下|
+| --image  | 要预测的图片文件路径 |
+| --image_list  | 按行存储图片路径的.txt文件 |
+| --device  | 运行的平台，可选项{"CPU"，"MYRIAD"}，默认值为"CPU"，如在VPU上请使用"MYRIAD"|
+| --cfg_file | PaddleX model 的.yml配置文件 |
+| --save_dir | 可视化结果图片保存地址，仅适用于检测任务，默认值为" "既不保存可视化结果 |
+### 样例
+`样例一`：
+linux系统在CPU下做单张图片的分类任务预测  
+测试图片 `/path/to/test_img.jpeg`  
+```shell
+./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml
+```
+`样例二`:
+linux系统在CPU下做多张图片的检测任务预测，并保存预测可视化结果
+预测的多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/test_img1.jpeg
+/path/to/images/test_img2.jpeg
+...
+/path/to/images/test_imgn.jpeg
+```
+```shell
+./build/detector --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output
+```
+`样例三`:  
+树莓派(Raspbian OS)在VPU下做单张图片分类任务预测
+测试图片 `/path/to/test_img.jpeg`  
+```shell
+./build/classifier --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD
+```
+## 性能测试
+`测试一`：  
+在服务器CPU下测试了OpenVINO对PaddleX部署的加速性能：
+- CPU：Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz
+- OpenVINO： 2020.4
+- PaddleX：采用Paddle预测库(1.8)，打开mkldnn加速，打开多线程。
+- 模型来自PaddleX tutorials，Batch Size均为1，耗时单位为ms/image，只计算模型运行时间，不包括数据的预处理和后处理，20张图片warmup，100张图片测试性能。
+|模型| PaddleX| OpenVINO |  图片输入大小|
+|---|---|---|---|
+|resnet-50 | 20.56 | 16.12 | 224*224 |
+|mobilenet-V2 | 5.16 | 2.31 |224*224|
+|yolov3-mobilnetv1 |76.63| 46.26|608*608 |  
+`测试二`:
+在PC机上插入VPU架构的神经计算棒(NCS2)，通过Openvino加速。
+- CPU：Intel(R) Core(TM) i5-4300U 1.90GHz
+- VPU：Movidius Neural Compute Stick2
+- OpenVINO： 2020.4
+- 模型来自PaddleX tutorials，Batch Size均为1，耗时单位为ms/image，只计算模型运行时间，不包括数据的预处理和后处理，20张图片warmup，100张图片测试性能。  
+|模型|OpenVINO|输入图片|
+|---|---|---|
+|mobilenetV2|24.00|224*224|
+|resnet50_vd_ssld|58.53|224*224|  
+`测试三`:
+在树莓派3B上插入VPU架构的神经计算棒(NCS2)，通过Openvino加速。
+- CPU ：ARM Cortex-A72 1.2GHz 64bit
+- VPU：Movidius Neural Compute Stick2
+- OpenVINO 2020.4
+- 模型来自paddleX tutorials，Batch Size均为1，耗时单位为ms/image，只计算模型运行时间，不包括数据的预处理和后处理，20张图片warmup，100张图片测试性能。  
+|模型|OpenVINO|输入图片大小|
+|---|---|---|
+|mobilenetV2|43.15|224*224|
+|resnet50|82.66|224*224|  
--- a/docs/deploy/openvino/python.md
+++ b/docs/deploy/openvino/python.md
+# Python预测部署
+文档说明了在python下基于OpenVINO的预测部署，部署前需要先将paddle模型转换为OpenVINO的Inference Engine，请参考[模型转换](docs/deploy/openvino/export_openvino_model.md)。目前CPU硬件上支持PadlleX的分类、检测、分割模型；VPU上支持PaddleX的分类模型。
+## 前置条件
+* Python 3.6+
+* OpenVINO 2020.4
+**说明**：OpenVINO安装请参考[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)  
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录 `/root/projects/`演示**。
+## 预测部署
+运行/root/projects/PaddleX/deploy/openvino/python目录下demo.py文件可以进行预测，其命令参数说明如下：
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | 模型转换生成的.xml文件路径，请保证模型转换生成的三个文件在同一路径下|
+| --img  | 要预测的图片文件路径 |
+| --image_list  | 按行存储图片路径的.txt文件 |
+| --device  | 运行的平台, 默认值为"CPU" |
+| --cfg_file | PaddleX model 的.yml配置文件 |
+### 样例
+`样例一`：  
+测试图片 `/path/to/test_img.jpeg`  
+```
+cd /root/projects/python  
+python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml
+```  
+样例二`:
+预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/test_img1.jpeg
+/path/to/images/test_img2.jpeg
+...
+/path/to/images/test_imgn.jpeg
+```
+```
+cd /root/projects/python  
+python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml
+```
--- a/docs/deploy/openvino/windows.md
+++ b/docs/deploy/openvino/windows.md
 # Windows平台
+## 说明
+Windows 平台下，我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目，但是直到`2019`才提供了稳定和完全的支持，所以如果你想使用CMake管理项目编译构建，我们推荐你使用`Visual Studio 2019`环境下构建。
+## 前置条件
+* Visual Studio 2019
+* OpenVINO 2020.4
+* CMake 3.0+
+**说明**：PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html) ， OpenVINO安装请参考[OpenVINO-Windows](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html)  
+**注意**：安装完OpenVINO后需要手动添加OpenVINO目录到系统环境变量，否则在运行程序时会出现找不到dll的情况。以安装OpenVINO时不改变OpenVINO安装目录情况下为示例，流程如下
+- 我的电脑->属性->高级系统设置->环境变量
+    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
+    - 新建，分别将OpenVINO以下路径填入并保存:  
+      `C:\Program File (x86)\IntelSWTools\openvino\inference_engine\bin\intel64\Release`  
+      `C:\Program File (x86)\IntelSWTools\openvino\inference_engine\external\tbb\bin`  
+      `C:\Program File (x86)\IntelSWTools\openvino\deployment_tools\ngraph\lib`  
+请确保系统已经安装好上述基本软件，并配置好相应环境，**下面所有示例以工作目录为 `D:\projects`演示。**
+## 预测部署  
+文档提供了c++下预测部署的方法，如果需要在python下预测部署请参考[python预测部署](./python.md)
+### Step1: 下载PaddleX预测代码
+```shell
+d:
+mkdir projects
+cd projects
+git clone https://github.com/PaddlePaddle/PaddleX.git
+```
+**说明**：其中`C++`预测代码在`PaddleX\deploy\openvino` 目录，该目录不依赖任何`PaddleX`下其他目录。
+### Step2 软件依赖
+提供了依赖软件预编译库:
+- [gflas-glog](https://bj.bcebos.com/paddlex/deploy/windows/third-parts.zip)  
+- [opencv](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe)  
+请下载上面两个连接的预编译库。若需要自行下载请参考：
+- gflags:[下载地址](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/gflags)
+- glog：[编译文档](https://github.com/google/glog)
+- opencv:[下载地址](https://opencv.org/releases/)  
+下载完opencv后需要配置环境变量，如下流程所示  
+    - 我的电脑->属性->高级系统设置->环境变量
+    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
+    - 新建，将opencv路径填入并保存，如`D:\projects\opencv\build\x64\vc14\bin`
+### Step3: 使用Visual Studio 2019直接编译CMake
+1. 打开Visual Studio 2019 Community，点击`继续但无需代码`
+2. 点击： `文件`->`打开`->`CMake` 选择C++预测代码所在路径（例如`D:\projects\PaddleX\deploy\openvino`），并打开`CMakeList.txt`：
+3. 点击：`项目`->`CMake设置`
+4. 点击`浏览`，分别设置编译选项指定`OpenVINO`、`Gflags`、`GLOG`、`NGRAPH`、`OPENCV`的路径  
+|  参数名   | 含义  |
+|  ----  | ----  |
+| OPENCV_DIR  | opencv库路径 |
+| OPENVINO_DIR | OpenVINO推理库路径，在OpenVINO安装目录下的deployment/inference_engine目录，若未修改OpenVINO默认安装目录可以不用修改 |
+| NGRAPH_LIB | OpenVINO的ngraph库路径，在OpenVINO安装目录下的deployment/ngraph/lib目录，若未修改OpenVINO默认安装目录可以不用修改 |
+| GFLAGS_DIR | gflags库路径 |
+| GLOG_DIR  | glog库路径 |
+| WITH_STATIC_LIB | 是否静态编译，默认为True |  
+**设置完成后**, 点击`保存并生成CMake缓存以加载变量`。
+5. 点击`生成`->`全部生成`
+### Step5: 预测
+上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下，打开`cmd`，并切换到该目录：
+```
+D:
+cd D:\projects\PaddleX\deploy\openvino\out\build\x64-Release
+```
+* 编译成功后，图片预测demo的入口程序为`detector.exe`，`classifier.exe`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | 模型转换生成的.xml文件路径，请保证模型转换生成的三个文件在同一路径下|
+| --image  | 要预测的图片文件路径 |
+| --image_list  | 按行存储图片路径的.txt文件 |
+| --device  | 运行的平台，可选项{"CPU"，"MYRIAD"}，默认值为"CPU"，如在VPU上请使用"MYRIAD"|
+| --cfg_file | PaddleX model 的.yml配置文件 |
+| --save_dir | 可视化结果图片保存地址，仅适用于检测任务，默认值为" "既不保存可视化结果 |
+### 样例
+`样例一`：
+在CPU下做单张图片的分类任务预测  
+测试图片 `/path/to/test_img.jpeg`  
+```shell
+./classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml
+```
+`样例二`:
+在CPU下做多张图片的检测任务预测，并保存预测可视化结果
+预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/test_img1.jpeg
+/path/to/images/test_img2.jpeg
+...
+/path/to/images/test_imgn.jpeg
+```
+```shell
+./detector.exe --model_dir=/path/to/models/openvino_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output
+```
+`样例三`:  
+在VPU下做单张图片分类任务预测
+测试图片 `/path/to/test_img.jpeg`  
+```shell
+.classifier.exe --model_dir=/path/to/openvino_model --image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml --device=MYRIAD
+```
--- a/docs/deploy/raspberry/Raspberry.md
+++ b/docs/deploy/raspberry/Raspberry.md
+# 树莓派
+PaddleX支持通过Paddle-Lite和基于OpenVINO的神经计算棒(NCS2)这两种方式在树莓派上完成预测部署。
+## 硬件环境配置  
+对于尚未安装系统的树莓派首先需要进行系统安装、环境配置等步骤来初始化硬件环境，过程中需要的软硬件如下：
+- 硬件：micro SD，显示器，键盘，鼠标
+- 软件：Raspbian OS
+### Step1：系统安装
+- 格式化micro SD卡为FAT格式，Windows和Mac下建议使用[SD Memory Card Formatter](https://www.sdcard.org/downloads/formatter/)工具，Linux下请参考[NOOBS For Raspberry Pi](http://qdosmsq.dunbar-it.co.uk/blog/2013/06/noobs-for-raspberry-pi/)  
+- 下载NOOBS版本的Raspbian OS [下载地址](https://www.raspberrypi.org/downloads/)并将解压后的文件复制到SD中，插入SD后给树莓派通电，然后将自动安装系统
+### Step2：环境配置
+- 启用VNC和SSH服务：打开LX终端输入，输入如下命令，选择Interfacing Option然后选择P2 SSH 和 P3 VNC分别打开SSH与VNC。打开后就可以通过SSH或者VNC的方式连接树莓派
+```
+sudo raspi-config
+```
+- 更换源：由于树莓派官方源速度很慢，建议在官网查询国内源 [树莓派软件源](https://www.jianshu.com/p/67b9e6ebf8a0)。更换后执行
+```
+sudo apt-get update
+sudo apt-get upgrade
+```
+## Paddle-Lite部署
+基于Paddle-Lite的部署目前可以支持PaddleX的分类、分割与检测模型，其实检测模型仅支持YOLOV3  
+部署的流程包括：PaddleX模型转换与转换后的模型部署  
+**说明**：PaddleX安装请参考[PaddleX](https://paddlex.readthedocs.io/zh_CN/develop/install.html)，Paddle-Lite详细资料请参考[Paddle-Lite](https://paddle-lite.readthedocs.io/zh/latest/index.html)
+请确保系统已经安装好上述基本软件，并配置好相应环境，**下面所有示例以工作目录 `/root/projects/`演示**。
+## Paddle-Lite模型转换
+将PaddleX模型转换为Paddle-Lite模型，具体请参考[Paddle-Lite模型转换](./export_nb_model.md)
+## Paddle-Lite 预测
+### Step1 下载PaddleX预测代码
+```
+mkdir -p /root/projects
+cd /root/projects
+git clone https://github.com/PaddlePaddle/PaddleX.git
+```
+**说明**：其中C++预测代码在PaddleX/deploy/raspberry 目录，该目录不依赖任何PaddleX下其他目录，如果需要在python下预测部署请参考[Python预测部署](./python.md)。  
+### Step2：Paddle-Lite预编译库下载
+提供了下载的opt工具对应的Paddle-Lite在ArmLinux下面的预编译库:[Paddle-Lite(ArmLinux)预编译库](https://bj.bcebos.com/paddlex/deploy/lite/inference_lite_2.6.1_armlinux.tar.bz2)。  
+建议用户使用预编译库，若需要自行编译，在树莓派上LX终端输入
+```
+git clone https://github.com/PaddlePaddle/Paddle-Lite.git
+cd Paddle-Lite
+sudo ./lite/tools/build.sh  --arm_os=armlinux --arm_abi=armv7hf --arm_lang=gcc  --build_extra=ON full_publish
+```  
+预编库位置：`./build.lite.armlinux.armv7hf.gcc/inference_lite_lib.armlinux.armv7hf/cxx`  
+**注意**：预测库版本需要跟opt版本一致，更多Paddle-Lite编译内容请参考[Paddle-Lite编译](https://paddle-lite.readthedocs.io/zh/latest/user_guides/source_compile.html)；更多预编译Paddle-Lite预测库请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
+### Step3 软件依赖
+提供了依赖软件的预编包或者一键编译，用户不需要单独下载或编译第三方依赖软件。若需要自行编译第三方依赖软件请参考：
+- gflags：编译请参考 [编译文档](https://gflags.github.io/gflags/#download)  
+- glog：编译请参考[编译文档](https://github.com/google/glog)
+- opencv: 编译请参考
+[编译文档](https://docs.opencv.org/master/d7/d9f/tutorial_linux_install.html)
+### Step4: 编译
+编译`cmake`的命令在`scripts/build.sh`中，修改LITE_DIR为Paddle-Lite预测库目录，若自行编译第三方依赖软件请根据Step1中编译软件的实际情况修改主要参数，其主要内容说明如下：
+```
+# Paddle-Lite预编译库的路径
+LITE_DIR=/path/to/Paddle-Lite/inference/lib
+# gflags预编译库的路径
+GFLAGS_DIR=$(pwd)/deps/gflags
+# glog预编译库的路径
+GLOG_DIR=$(pwd)/deps/glog
+# opencv预编译库的路径
+OPENCV_DIR=$(pwd)/deps/opencv/
+```
+执行`build`脚本：
+ ```shell
+ sh ./scripts/build.sh
+ ```  
+### Step3: 预测
+编译成功后，分类任务的预测可执行程序为`classifier`,分割任务的预测可执行程序为`segmenter`，检测任务的预测可执行程序为`detector`，其主要命令参数说明如下：  
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | 模型转换生成的.xml文件路径，请保证模型转换生成的三个文件在同一路径下|
+| --image  | 要预测的图片文件路径 |
+| --image_list  | 按行存储图片路径的.txt文件 |
+| --thread_num | 预测的线程数，默认值为1 |
+| --cfg_file | PaddleX model 的.yml配置文件 |
+| --save_dir | 可视化结果图片保存地址，仅适用于检测和分割任务，默认值为" "既不保存可视化结果 |
+### 样例
+`样例一`：
+单张图片分类任务  
+测试图片 `/path/to/test_img.jpeg`  
+```shell
+./build/classifier --model_dir=/path/to/nb_model
+--image=/path/to/test_img.jpeg --cfg_file=/path/to/PadlleX_model.yml  --thread_num=4
+```
+`样例二`:
+多张图片分割任务
+预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/test_img1.jpeg
+/path/to/images/test_img2.jpeg
+...
+/path/to/images/test_imgn.jpeg
+```
+```shell
+./build/segmenter --model_dir=/path/to/models/nb_model --image_list=/root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --save_dir ./output --thread_num=4  
+```  
+## 性能测试
+### 测试环境：
+硬件：Raspberry Pi 3 Model B
+系统：raspbian OS
+软件：paddle-lite 2.6.1
+### 测试结果
+单位ms，num表示paddle-lite下使用的线程数
+|模型|lite(num=4)|输入图片大小|
+| ----|  ---- | ----|
+|mobilenet-v2|136.19|224*224|
+|resnet-50|1131.42|224*224|
+|deeplabv3|2162.03|512*512|
+|hrnet|6118.23|512*512|
+|yolov3-darknet53|4741.15|320*320|
+|yolov3-mobilenet|1424.01|320*320|
+|densenet121|1144.92|224*224|
+|densenet161|2751.57|224*224|
+|densenet201|1847.06|224*224|
+|HRNet_W18|1753.06|224*224|
+|MobileNetV1|177.63|224*224|
+|MobileNetV3_large_ssld|133.99|224*224|
+|MobileNetV3_small_ssld|53.99|224*224|
+|ResNet101|2290.56|224*224|
+|ResNet101_vd|2337.51|224*224|
+|ResNet101_vd_ssld|3124.49|224*224|
+|ShuffleNetV2|115.97|224*224|
+|Xception41|1418.29|224*224|
+|Xception65|2094.7|224*224|  
+从测试结果看建议用户在树莓派上使用MobileNetV1-V3,ShuffleNetV2这类型的小型网络
+## NCS2部署
+树莓派支持通过OpenVINO在NCS2上跑PaddleX模型预测，目前仅支持PaddleX的分类网络，基于NCS2的方式包含Paddle模型转OpenVINO IR以及部署IR在NCS2上进行预测两个步骤。
+- 模型转换请参考：[PaddleX模型转换为OpenVINO IR]('./openvino/export_openvino_model.md')，raspbian OS上的OpenVINO不支持模型转换，需要先在host侧转换FP16的IR。
+- 预测部署请参考[OpenVINO部署](./openvino/linux.md)中VPU在raspbian OS部署的部分
--- a/docs/deploy/raspberry/export_nb_model.md
+++ b/docs/deploy/raspberry/export_nb_model.md
+# Paddle-Lite模型转换
+将PaddleX模型转换为Paddle-Lite的nb模型，模型转换主要包括PaddleX转inference model和inference model转Paddle-Lite nb模型
+### Step1：导出inference模型
+PaddleX模型转Paddle-Lite模型之前需要先把PaddleX模型导出为inference格式模型，导出的模型将包括__model__、__params__和model.yml三个文件名。具体方法请参考[Inference模型导出](../export_model.md)。
+### Step2：导出Paddle-Lite模型
+Paddle-Lite模型需要通过Paddle-Lite的opt工具转出模型，下载并解压: [模型优化工具opt（2.6.1-linux）](https://bj.bcebos.com/paddlex/deploy/Rasoberry/opt.zip)，在Linux系统下运行：
+``` bash
+./opt --model_file=<model_path> \
+      --param_file=<param_path> \
+      --valid_targets=arm \
+      --optimize_out_type=naive_buffer \
+      --optimize_out=model_output_name
+```
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_file  | 导出inference模型中包含的网络结构文件：`__model__`所在的路径|
+| --param_file  | 导出inference模型中包含的参数文件：`__params__`所在的路径|
+| --valid_targets  |指定模型可执行的backend，这里请指定为`arm`|
+| --optimize_out_type | 输出模型类型，目前支持两种类型：protobuf和naive_buffer，其中naive_buffer是一种更轻量级的序列化/反序列化，这里请指定为`naive_buffer`|  
+若安装了python版本的Paddle-Lite也可以通过如下方式转换
+``` 
+./paddle_lite_opt --model_file=<model_path> \
+      --param_file=<param_path> \
+      --valid_targets=arm \
+      --optimize_out_type=naive_buffer \
+      --optimize_out=model_output_name
+```
+更多详细的使用方法和参数含义请参考: [使用opt转化模型](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html)，更多opt预编译版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
+**注意**：opt版本需要跟预测库版本保持一致，如使2.6.0版本预测库，请从上面Release Note中下载2.6.0版本的opt转换模型
\ No newline at end of file
--- a/docs/deploy/raspberry/index.rst
+++ b/docs/deploy/raspberry/index.rst
+树莓派部署
+=======================================
+.. toctree::
+   :maxdepth: 2
+   :caption: 文档目录:
+   Raspberry.md
+   python.md
+   export_nb_model.md 
--- a/docs/deploy/raspberry/python.md
+++ b/docs/deploy/raspberry/python.md
+# Python预测部署
+文档说明了在树莓派上使用Python版本的Paddle-Lite进行PaddleX模型好的预测部署，根据下面的命令安装Python版本的Paddle-Lite预测库，若安装不成功用户也可以下载whl文件进行安装[Paddle-Lite_2.6.0_python](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.6.0/armlinux_python_installer.zip)，更多版本请参考[Paddle-Lite Release Note](https://github.com/PaddlePaddle/Paddle-Lite/releases)
+```
+python -m pip install paddlelite
+```
+部署前需要先将PaddleX模型转换为Paddle-Lite的nb模型，具体请参考[Paddle-Lite模型转换](./export_nb_model.md)
+**注意**：若用户使用2.6.0的Python预测库，请下载2.6.0版本的opt转换工具转换模型
+## 前置条件
+* Python 3.6+
+* Paddle-Lite_python 2.6.0+
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录 `/root/projects/`演示**。
+## 预测部署
+运行/root/projects/PaddleX/deploy/raspberry/python目录下demo.py文件可以进行预测，其命令参数说明如下：
+|  参数   | 说明  |
+|  ----  | ----  |
+| --model_dir  | 模型转换生成的.xml文件路径，请保证模型转换生成的三个文件在同一路径下|
+| --img  | 要预测的图片文件路径 |
+| --image_list  | 按行存储图片路径的.txt文件 |
+| --cfg_file | PaddleX model 的.yml配置文件 |
+| --thread_num  | 预测的线程数, 默认值为1 |
+| --input_shape  | 模型输入中图片输入的大小[N,C,H.W] |
+### 样例
+`样例一`：  
+测试图片 `/path/to/test_img.jpeg`  
+```
+cd /root/projects/python  
+python demo.py --model_dir /path/to/openvino_model --img /path/to/test_img.jpeg --cfg_file /path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224]
+```  
+样例二`:
+预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/test_img1.jpeg
+/path/to/images/test_img2.jpeg
+...
+/path/to/images/test_imgn.jpeg
+```
+```
+cd /root/projects/python  
+python demo.py --model_dir /path/to/models/openvino_model --image_list /root/projects/images_list.txt --cfg_file=/path/to/PadlleX_model.yml --thread_num 4 --input_shape [1,3,224,224]
+```
--- a/docs/deploy/server/cpp/linux.md
+++ b/docs/deploy/server/cpp/linux.md
@@ -19,16 +19,15 @@
 ### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
-PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8版本，以下提供了多个不同版本的Paddle预测库:
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，目前PaddleX依赖于Paddle1.8.4版本，以下提供了多个不同版本的Paddle预测库:
-|  版本说明   | 预测库(1.8.2版本)  |
+|  版本说明   | 预测库(1.8.4版本)  |
 |  ----  | ----  |
-| ubuntu14.04_cpu_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/latest-cpu-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cpu_avx_openblas  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_avx_openblas  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/latest-cpu-avx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cpu_noavx_openblas  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
+| ubuntu14.04_cpu_noavx_openblas  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/latest-cpu-noavx-openblas/fluid_inference.tgz) |
-| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
+| ubuntu14.04_cuda9.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/latest-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
+| ubuntu14.04_cuda10.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/latest-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz) |
-| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6  | [paddle_inference](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
 更多和更新的版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
@@ -97,8 +96,8 @@ make
 ```
 **注意：** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML，如果编译环境无法访问外网，可手动下载：
- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2)
+- [opencv3.4.6gcc4.8ffmpeg.tar.gz2](https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2)
- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
+- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/1.2.0/paddlex-encryption.zip)
 - [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
 opencv3gcc4.8.tar.bz2文件下载后解压，然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
@@ -125,6 +124,8 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
+| use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，**classfier无该参数** |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
@@ -141,6 +142,8 @@ yaml-cpp.zip文件下载后无需解压，在cmake/yaml.cmake中将`URL https://
 | video_path | 视频文件的路径 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
 | use_trt  | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
+| use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | show_result | 对视频文件做预测时，是否在屏幕上实时显示预测可视化结果(因加入了延迟处理，故显示结果不能反映真实的帧率)，支持值为0或1(默认值为0) |
 | save_result | 是否将每帧的预测可视结果保存为视频文件，支持值为0或1(默认值为1) |

--- a/docs/deploy/server/cpp/windows.md
+++ b/docs/deploy/server/cpp/windows.md
@@ -26,15 +26,15 @@ git clone https://github.com/PaddlePaddle/PaddleX.git
 ### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
-PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库，目前PaddleX依赖于Paddle 1.8，基于Paddle 1.8的Paddle预测库下载链接如下所示:
+PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库，目前PaddleX依赖于Paddle 1.8.4，基于Paddle 1.8.4的Paddle预测库下载链接如下所示:
-|  版本说明   | 预测库(1.8.2版本)  | 编译器 | 构建工具| cuDNN | CUDA |
+|  版本说明   | 预测库(1.8.4版本)  | 编译器 | 构建工具| cuDNN | CUDA |
 |  ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| cpu_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.4/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cpu_avx_openblas  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
+| cpu_avx_openblas  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.4/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
-| cuda9.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.4/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda9.0_cudnn7_avx_openblas  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
+| cuda9.0_cudnn7_avx_openblas  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.4/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
-| cuda10.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
+| cuda10.0_cudnn7_avx_mkl  | [paddle_inference](https://paddle-wheel.bj.bcebos.com/1.8.4/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 10.0 |
 请根据实际情况选择下载，如若以上版本不满足您的需求，请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。
@@ -82,7 +82,7 @@ PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及
 1. 如果使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾
 2. 如果使用的是`openblas`版本，请把`WITH_MKL`的`值`去掉勾
 3. Windows环境下编译会自动下载YAML，如果编译环境无法访问外网，可手动下载： [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)。YAML文件下载后无需解压，在`cmake/yaml.cmake`中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址，改为下载文件的路径。
-4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)。例如解压到`D:/projects`，解压后目录为`D:/projects/paddlex-encryption`。编译时需勾选`WITH_EBNCRYPTION`并且在`ENCRTYPTION_DIR`填入`D:/projects/paddlex-encryption`。
+4. 如果需要使用模型加密功能，需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/1.2.0/paddlex-encryption.zip)。例如解压到`D:/projects`，解压后目录为`D:/projects/paddlex-encryption`。编译时需勾选`WITH_EBNCRYPTION`并且在`ENCRTYPTION_DIR`填入`D:/projects/paddlex-encryption`。
 ![](../../images/vs2019_step_encryption.png)
 ![](../../images/vs2019_step6.png)
 **设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
@@ -109,6 +109,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | image  | 要预测的图片文件路径 |
 | image_list  | 按行存储图片路径的.txt文件 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
+| use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | save_dir | 保存可视化结果的路径, 默认值为"output"，classifier无该参数 |
 | key | 加密过程中产生的密钥信息，默认值为""表示加载的是未加密的模型 |
@@ -124,6 +126,8 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
 | camera_id | 摄像头设备ID，默认值为0 |
 | video_path | 视频文件的路径 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
+| use_mkl  | 是否使用 MKL加速CPU预测, 支持值为0或1(默认值为1) |
+| mkl_thread_num | MKL推理的线程数，默认为cpu处理器个数 |
 | gpu_id  | GPU 设备ID, 默认值为0 |
 | show_result | 对视频文件做预测时，是否在屏幕上实时显示预测可视化结果(因加入了延迟处理，故显示结果不能反映真实的帧率)，支持值为0或1(默认值为0) |
 | save_result | 是否将每帧的预测可视结果保存为视频文件，支持值为0或1(默认值为1) |

--- a/docs/deploy/server/encryption.md
+++ b/docs/deploy/server/encryption.md
@@ -40,9 +40,9 @@ PaddleX提供一个轻量级的模型加密部署方案，通过PaddleX内置的
 ### 1.2 加密工具
-[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。
+[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/1.2.0/paddlex-encryption.zip)，编译脚本会自动下载该版本加密工具，您也可以选择手动下载。
-[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)，该版本加密工具需手动下载，如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具，此处可不必重复下载。
+[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/1.2.0/paddlex-encryption.zip)，该版本加密工具需手动下载，如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具，此处可不必重复下载。
 Linux加密工具包含内容为：
 ```

--- a/docs/examples/index.rst
+++ b/docs/examples/index.rst
@@ -12,4 +12,5 @@ PaddleX精选飞桨视觉开发套件在产业实践中的成熟模型结构，
   solutions.md
   meter_reader.md
   human_segmentation.md
+   multi-channel_remote_sensing/README.md
   remote_sensing.md
--- a/docs/examples/meter_reader.md
+++ b/docs/examples/meter_reader.md
@@ -70,7 +70,6 @@ cd PaddleX/examples/meter_reader/
 | save_dir	| 保存可视化结果的路径, 默认值为"output"|
 | score_threshold | 检测模型输出结果中，预测得分低于该阈值的框将被滤除，默认值为0.5|
 | seg_batch_size | 分割的批量大小，默认为2 |
-| seg_thread_num	| 分割预测的线程数，默认为cpu处理器个数 |
 | use_camera | 是否使用摄像头采集图片，默认为False |
 | camera_id | 摄像头设备ID，默认值为0 |
 | use_erode | 是否使用图像腐蚀对分割预测图进行细分，默认为False |
@@ -91,19 +90,19 @@ export CUDA_VISIBLE_DEVICES=
 * 预测单张图片
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
 ```
 * 预测多张图片
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
 ```
 * 开启摄像头预测
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
 ```
 ## 推理部署
@@ -260,12 +259,12 @@ step 5. 推理预测：
 * 表盘检测的训练
 ```
-python3 /path/to/PaddleX/examples/meter_reader/train_detection.py
+python /path/to/PaddleX/examples/meter_reader/train_detection.py
 ```
 * 指针和刻度分割的训练
 ```
-python3 /path/to/PaddleX/examples/meter_reader/train_segmentation.py
+python /path/to/PaddleX/examples/meter_reader/train_segmentation.py
 ```

--- a/docs/examples/multi-channel_remote_sensing/README.md
+++ b/docs/examples/multi-channel_remote_sensing/README.md
+# 多通道遥感影像分割
+遥感影像分割是图像分割领域中的重要应用场景，广泛应用于土地测绘、环境监测、城市建设等领域。遥感影像分割的目标多种多样，有诸如积雪、农作物、道路、建筑、水源等地物目标，也有例如云层的空中目标。
+本案例基于PaddleX实现多通道遥感影像分割，涵盖数据分析、模型训练、模型预测等流程，旨在帮助用户利用深度学习技术解决多通道遥感影像分割问题。
+## 前置依赖
+* Paddle paddle >= 1.8.4
+* Python >= 3.5
+* PaddleX >= 1.1.4
+安装的相关问题参考[PaddleX安装](../../install.md)
+**另外还需安装gdal**, 使用pip安装gdal可能出错，推荐使用conda进行安装：
+```
+conda install gdal
+```
+下载PaddleX源码:  
+```  
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+该案例所有脚本均位于`PaddleX/examples/channel_remote_sensing/`，进入该目录：  
+```
+cd PaddleX/examples/channel_remote_sensing/  
+```
+## 数据准备
+遥感影像的格式多种多样，不同传感器产生的数据格式也可能不同。PaddleX现已兼容以下4种格式图片读取：
+- `tif`
+- `png`
+- `img`
+- `npy`
+标注图要求必须为单通道的png格式图像，像素值即为对应的类别，像素标注类别需要从0开始递增。例如0，1，2，3表示有4种类别，255用于指定不参与训练和评估的像素，标注类别最多为256类。
+本案例使用[L8 SPARCS公开数据集](https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation)进行云雪分割，该数据集包含80张卫星影像，涵盖10个波段。原始标注图片包含7个类别，分别是`cloud`, `cloud shadow`, `shadow over water`, `snow/ice`, `water`, `land`和`flooded`。由于`flooded`和`shadow over water`2个类别占比仅为`1.8%`和`0.24%`，我们将其进行合并，`flooded`归为`land`，`shadow over water`归为`shadow`，合并后标注包含5个类别。
+数值、类别、颜色对应表：
+|Pixel value|Class|Color|
+|---|---|---|
+|0|cloud|white|
+|1|shadow|black|
+|2|snow/ice|cyan|
+|3|water|blue|
+|4|land|grey|
+ ![](../../../examples/multi-channel_remote_sensing/docs/images/dataset.png)
+执行以下命令下载并解压经过类别合并后的数据集：
+```shell script
+mkdir dataset && cd dataset
+wget https://paddleseg.bj.bcebos.com/dataset/remote_sensing_seg.zip
+unzip remote_sensing_seg.zip
+cd ..
+```
+其中`data`目录存放遥感影像，`data_vis`目录存放彩色合成预览图，`mask`目录存放标注图。
+## 数据分析  
+遥感影像往往由许多波段组成，不同波段数据分布可能大相径庭，例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的分布来优化模型训练效果，需要对数据进行分析。
+参考文档[数据分析](./analysis.md)对训练集进行统计分析，确定图像像素值的截断范围，并统计截断后的均值和方差。
+## 模型训练
+本案例选择`UNet`语义分割模型完成云雪分割，运行以下步骤完成模型训练，模型的最优精度`miou`为`78.38%`。
+* 设置GPU卡号
+```shell script
+export CUDA_VISIBLE_DEVICES=0
+```
+* 运行以下脚本开始训练
+```shell script
+python train.py --data_dir dataset/remote_sensing_seg \
+--train_file_list dataset/remote_sensing_seg/train.txt \
+--eval_file_list dataset/remote_sensing_seg/val.txt \
+--label_list dataset/remote_sensing_seg/labels.txt \
+--save_dir saved_model/remote_sensing_unet \
+--num_classes 5 \
+--channel 10 \
+--lr 0.01 \
+--clip_min_value 7172 6561 5777 5103 4291 4000 4000 4232 6934 7199 \
+--clip_max_value 50000 50000 50000 50000 50000 40000 30000 18000 40000 36000 \
+--mean 0.15163569 0.15142828 0.15574491 0.1716084  0.2799778  0.27652043 0.28195933 0.07853807 0.56333154 0.5477584 \
+--std  0.09301891 0.09818967 0.09831126 0.1057784  0.10842132 0.11062996 0.12791838 0.02637859 0.0675052  0.06168227 \
+--num_epochs 500 \
+--train_batch_size 3
+```
+也可以跳过模型训练步骤，下载预训练模型直接进行模型预测：
+```
+wget https://bj.bcebos.com/paddlex/examples/multi-channel_remote_sensing/models/l8sparcs_remote_model.tar.gz
+tar -xvf l8sparcs_remote_model.tar.gz
+```
+## 模型预测
+运行以下脚本，对遥感图像进行预测并可视化预测结果，相应地也将对应的标注文件进行可视化，以比较预测效果。
+```shell script
+export CUDA_VISIBLE_DEVICES=0
+python predict.py
+```
+可视化效果如下所示:
+![](../../../examples/multi-channel_remote_sensing/docs/images/prediction.jpg)
+数值、类别、颜色对应表：
+|Pixel value|Class|Color|
+|---|---|---|
+|0|cloud|white|
+|1|shadow|black|
+|2|snow/ice|cyan|
+|3|water|blue|
+|4|land|grey|
--- a/docs/examples/multi-channel_remote_sensing/analysis.md
+++ b/docs/examples/multi-channel_remote_sensing/analysis.md
+# 数据分析
+遥感影像往往由许多波段组成，不同波段数据分布可能大相径庭，例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的分布来优化模型训练效果，需要对数据进行分析。
+## 统计分析
+执行以下脚本，对训练集进行统计分析，屏幕会输出分析结果，同时结果也会保存至文件`train_information.pkl`中：
+```
+python tools/analysis.py
+```
+数据统计分析内容如下：
+* 图像数量
+例如统计出训练集中有64张图片：
+```
+64 samples in file dataset/remote_sensing_seg/train.txt
+```
+* 图像最大和最小的尺寸
+例如统计出训练集中最大的高宽和最小的高宽分别是(1000, 1000)和(1000, 1000):
+```
+Minimal image height: 1000 Minimal image width: 1000.
+Maximal image height: 1000 Maximal image width: 1000.
+```
+* 图像通道数量
+例如统计出图像的通道数量为10:
+```
+Image channel is 10.
+```
+* 图像各通道的最小值和最大值
+最小值和最大值分别以列表的形式输出，按照通道从小到大排列。例如：
+```
+Minimal image value: [7.172e+03 6.561e+03 5.777e+03 5.103e+03 4.291e+03 1.000e+00 1.000e+00 4.232e+03 6.934e+03 7.199e+03]
+Maximal image value: [65535. 65535. 65535. 65535. 65535. 65535. 65535. 56534. 65535. 63215.]
+```
+* 图像各通道的像素值分布
+针对各个通道，统计出各像素值的数量，并以柱状图的形式呈现在以'distribute.png'结尾的图片中。**需要注意的是，为便于观察，纵坐标为对数坐标**。用户可以查看这些图片来选择是否需要对分布在头部和尾部的像素值进行截断。
+```
+Image pixel distribution of each channel is saved with 'distribute.png' in the dataset/remote_sensing_seg
+```
+* 图像各通道归一化后的均值和方差
+各通道归一化系数为各通道最大值与最小值之差，均值和方差以列别形式输出，按照通道从小到大排列。例如：
+```
+Image mean value: [0.23417574 0.22283101 0.2119595  0.2119887  0.27910388 0.21294892 0.17294037 0.10158925 0.43623915 0.41019192]
+Image standard deviation: [0.06831269 0.07243951 0.07284761 0.07875261 0.08120818 0.0609302 0.05110716 0.00696064 0.03849307 0.03205579]
+```
+* 标注图中各类别的数量及比重
+统计各类别的像素数量和在数据集全部像素的占比，以（类别值，该类别的数量，该类别的占比）的格式输出。例如：
+```
+Label pixel information is shown in a format of (label_id, the number of label_id, the ratio of label_id):
+(0, 13302870, 0.20785734374999995)
+(1, 4577005, 0.07151570312500002)
+(2, 3955012, 0.0617970625)
+(3, 2814243, 0.04397254687499999)
+(4, 39350870, 0.6148573437500001)
+```
+## 2 确定像素值截断范围
+遥感影像数据分布范围广，往往存在一些异常值，这会影响算法对实际数据分布的拟合效果。为更好地对数据进行归一化，可以抑制遥感影像中少量的异常值。根据`图像各通道的像素值分布`来确定像素值的截断范围，并在后续图像预处理过程中对超出范围的像素值通过截断进行校正，从而去除异常值带来的干扰。**注意：该步骤是否执行根据数据集实际分布来决定。**
+例如各通道的像素值分布可视化效果如下：
+![](../../../examples/multi-channel_remote_sensing/docs/images/image_pixel_distribution.png)
+**需要注意的是，为便于观察，纵坐标为对数坐标。**
+对于上述分布，我们选取的截断范围是(按照通道从小到大排列)：
+```
+截断范围最小值： clip_min_value = [7172,  6561,  5777, 5103, 4291, 4000, 4000, 4232, 6934, 7199]
+截断范围最大值： clip_max_value = [50000, 50000, 50000, 50000, 50000, 40000, 30000, 18000, 40000, 36000]
+```
+## 3 确定像素值截断范围
+为避免数据截断范围选取不当带来的影响，应该统计异常值像素占比，确保受影响的像素比例不要过高。接着对截断后的数据计算归一化后的均值和方差，**用于后续模型训练时的图像预处理参数设置**。
+执行以下脚本：
+```
+python tools/cal_clipped_mean_std.py
+```
+截断像素占比统计结果如下:
+```
+Channel 0, the ratio of pixels to be clipped = 0.00054778125
+Channel 1, the ratio of pixels to be clipped = 0.0011129375
+Channel 2, the ratio of pixels to be clipped = 0.000843703125
+Channel 3, the ratio of pixels to be clipped = 0.00127125
+Channel 4, the ratio of pixels to be clipped = 0.001330140625
+Channel 5, the ratio of pixels to be clipped = 8.1375e-05
+Channel 6, the ratio of pixels to be clipped = 0.0007348125
+Channel 7, the ratio of pixels to be clipped = 6.5625e-07
+Channel 8, the ratio of pixels to be clipped = 0.000185921875
+Channel 9, the ratio of pixels to be clipped = 0.000139671875
+```
+可看出，被截断像素占比均不超过0.2%。
+裁剪后数据的归一化系数如下：
+```
+Image mean value: [0.15163569 0.15142828 0.15574491 0.1716084  0.2799778  0.27652043 0.28195933 0.07853807 0.56333154 0.5477584 ]
+Image standard deviation: [0.09301891 0.09818967 0.09831126 0.1057784  0.10842132 0.11062996 0.12791838 0.02637859 0.0675052  0.06168227]
+(normalized by (clip_max_value - clip_min_value), arranged in 0-10 channel order)
+```
--- a/docs/examples/remote_sensing.md
+++ b/docs/examples/remote_sensing.md
-# 遥感影像分割
+# RGB遥感影像分割
-本案例基于PaddleX实现遥感影像分割，提供无重叠的大图切小图以及有重叠的大图切小图两种预测方式。
+本案例基于PaddleX实现遥感影像分割，提供滑动窗口预测方式，以避免在直接对大尺寸图片进行预测时显存不足的发生。此外，滑动窗口之间的重叠程度可配置，以此消除最终预测结果中各窗口拼接处的裂痕感。
 ## 前置依赖
 * Paddle paddle >= 1.8.4
 * Python >= 3.5
-* PaddleX >= 1.1.0
+* PaddleX >= 1.1.4
 安装的相关问题参考[PaddleX安装](../install.md)
@@ -26,51 +26,57 @@ cd PaddleX/examples/remote_sensing/
 本案例使用2015 CCF大数据比赛提供的高清遥感影像，包含5张带标注的RGB图像，图像尺寸最大有7969 × 7939、最小有4011 × 2470。该数据集共标注了5类物体，分别是背景（标记为0）、植被（标记为1）、建筑（标记为2）、水体（标记为3）、道路 （标记为4）。
-本案例将前4张图片划分入训练集，第5张图片作为验证集。为增加训练时的批量大小，以滑动窗口为(1024，1024)、步长为(512, 512)对前4张图片进行切分，加上原本的4张大尺寸图片，训练集一共有688张图片。直接对大图片进行验证会导致显存不足，为避免此类问题的出现，针对验证集，以滑动窗口为(769, 769)、步长为(769，769)对第5张图片进行切分，得到40张子图片。
+本案例将前4张图片划分入训练集，第5张图片作为验证集。为增加训练时的批量大小，以滑动窗口为(1024，1024)、步长为(512, 512)对前4张图片进行切分，加上原本的4张大尺寸图片，训练集一共有688张图片。在训练过程中直接对大图片进行验证会导致显存不足，为避免此类问题的出现，针对验证集以滑动窗口为(769, 769)、步长为(769，769)对第5张图片进行切分，得到40张子图片。
 运行以下脚本，下载原始数据集，并完成数据集的切分：
 ```
-python3 prepare_data.py
+python prepare_data.py
 ```
 ## 模型训练
 分割模型选择Backbone为MobileNetv3_large_ssld的Deeplabv3模型，该模型兼备高性能高精度的优点。运行以下脚本，进行模型训练：
 ```
-python3 train.py
+python train.py
+```
+也可以跳过模型训练步骤，直接下载预训练模型进行后续的模型预测和评估：
+```
+wget https://bj.bcebos.com/paddlex/examples/remote_sensing/models/ccf_remote_model.tar.gz
+tar -xvf ccf_remote_model.tar.gz
 ```
 ## 模型预测
-直接对大图片进行预测会导致显存不足，为避免此类问题的出现，本案例提供了两种预测方式：无重叠的大图切小图和有重叠的大图切小图。
+直接对大尺寸图片进行预测会导致显存不足，为避免此类问题的出现，本案例提供了滑动窗口预测接口，支持有重叠和无重叠两种方式。
-* 无重叠的大图切小图
+* 无重叠的滑动窗口预测
-将大图像切分成互不重叠多个小块，分别对每个小块进行预测，最后将小块预测结果拼接成大图预测结果。由于每个小块边缘部分的预测效果会比中间部分的差，因此每个小块拼接处可能会有明显的裂痕感。
+在输入图片上以固定大小的窗口滑动，分别对每个窗口下的图像进行预测，最后将各窗口的预测结果拼接成输入图片的预测结果。由于每个窗口边缘部分的预测效果会比中间部分的差，因此每个窗口拼接处可能会有明显的裂痕感。
-该预测方式的API接口详见[tile_predict](../apis/models/semantic_segmentation.html#tile-predict)。
+该预测方式的API接口详见[overlap_tile_predict](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict)，**使用时需要把参数`pad_size`设置为`[0, 0]`**。
-* 有重叠的大图切小图
+* 有重叠的滑动窗口预测
-Unet论文作者提出一种有重叠的大图切小图策略（Overlap-tile strategy）来消除拼接处的裂痕感。每次划分小块时向四周扩展面积，例如下图中的蓝色部分区域，到拼接大图时取小块中间部分的预测结果，例如下图中的黄色部分区域，对于处于原始图像边缘处的小块，其扩展面积下的像素则通过将边缘部分像素镜像填补得到。
+在Unet论文中，作者提出一种有重叠的滑动窗口预测策略（Overlap-tile strategy）来消除拼接处的裂痕感。对各滑动窗口预测时，会向四周扩展一定的面积，对扩展后的窗口进行预测，例如下图中的蓝色部分区域，到拼接时只取各窗口中间部分的预测结果，例如下图中的黄色部分区域。位于输入图像边缘处的窗口，其扩展面积下的像素则通过将边缘部分像素镜像填补得到。
-该预测方式的API接口说明详见[overlap_tile_predict](../apis/models/semantic_segmentation.html#overlap-tile-predict)。
+该预测方式的API接口说明详见[overlap_tile_predict](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict)。
 ![](../../examples/remote_sensing/images/overlap_tile.png)
-相比无重叠的大图切小图，有重叠的大图切小图策略将本案例的模型精度miou从80.58%提升至81.52%，并且将预测可视化结果中裂痕感显著消除，可见下图中两种预测方式的效果对比。
+相比无重叠的滑动窗口预测，有重叠的滑动窗口预测策略将本案例的模型精度miou从80.58%提升至81.52%，并且将预测可视化结果中裂痕感显著消除，可见下图中两种预测方式的效果对比。
-![](../../examples/remote_sensing/images/visualize_compare.png)
+![](../../examples/remote_sensing/images/visualize_compare.jpg)
-运行以下脚本使用有重叠的大图切小图预测方式进行预测。如需使用无重叠的大图切小图的预测方式，参考以下脚本中的注释修改模型预测接口：
+运行以下脚本使用有重叠的滑动窗口进行预测：
 ```
-python3 predict.py
+python predict.py
 ```
 ## 模型评估
 在训练过程中，每隔10个迭代轮数会评估一次模型在验证集的精度。由于已事先将原始大尺寸图片切分成小块，此时相当于使用无重叠的大图切小图预测方式，最优模型精度miou为80.58%。运行以下脚本，将采用有重叠的大图切小图的预测方式，重新评估原始大尺寸图片的模型精度，此时miou为81.52%。
 ```
-python3 eval.py
+python eval.py
 ```
--- a/docs/gui/.DS_Store
+++ b/docs/gui/.DS_Store
--- a/docs/gui/download.md
+++ b/docs/gui/download.md
@@ -24,5 +24,5 @@
  * **内存**：建议8G以上  
  * **硬盘空间**：建议SSD剩余空间1T以上（非必须）  
-***注：PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
+***注：PaddleX在Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
--- a/docs/gui/faq.md
+++ b/docs/gui/faq.md
-## FAQ
 1. **为什么训练速度这么慢？**
@@ -28,8 +28,18 @@
 5. **如何调用后端代码？**
   PaddleX 团队为您整理了相关的API接口文档，方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
+6. **如何在离线环境下使用PaddleX？**
+   PaddleX是支撑用户在本地离线环境中训练模型的，但是如果大家希望使用PaddleX团队为大家准备好的在标准数据集上训练的预训练模型，则需要在线环境进行下载。大家可以参照完整的无联网情况下进行模型训练的[文档](https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/how_to_offline_run.md)查看如何一键快速下载所有预训练模型。
+7. **有没有行业应用案例，或者实现好的工程实例？**
+   有的，PaddleX提供丰富的行业应用案例和完整的示例项目，请参考[PaddleX产业案例集](https://paddlex.readthedocs.io/zh_CN/develop/examples/index.html)
 **如果您有任何问题或建议，欢迎以issue的形式，或加入PaddleX官方QQ群（1045148026）直接反馈您的问题和需求**

--- a/docs/gui/how_to_use.md
+++ b/docs/gui/how_to_use.md
 # PaddleX GUI使用教程
+*注：如果你的系统是 Mac OS 10.15.5及以上，在双击客户端icon后，需要在Terminal中执行 sudo xattr -r -d com.apple.quarantine /Users/username/PaddleX ，并稍等几秒来启动客户端，其中 /Users/username/PaddleX 为您保存PaddleX的文件夹路径*
 **第一步：准备数据**
 在开始模型训练前，您需要根据不同的任务类型，将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)。

--- a/docs/gui/images/LIME.png
+++ b/docs/gui/images/LIME.png
--- a/docs/gui/images/QR2.jpg
+++ b/docs/gui/images/QR2.jpg
--- a/docs/gui/images/paddlexoverview.png
+++ b/docs/gui/images/paddlexoverview.png
--- a/examples/meter_reader/README.md
+++ b/examples/meter_reader/README.md
@@ -79,7 +79,6 @@ cd PaddleX/examples/meter_reader/
 | save_dir	| 保存可视化结果的路径, 默认值为"output"|
 | score_threshold | 检测模型输出结果中，预测得分低于该阈值的框将被滤除，默认值为0.5|
 | seg_batch_size | 分割的批量大小，默认为2 |
-| seg_thread_num	| 分割预测的线程数，默认为cpu处理器个数 |
 | use_camera | 是否使用摄像头采集图片，默认为False |
 | camera_id | 摄像头设备ID，默认值为0 |
 | use_erode | 是否使用图像腐蚀对分割预测图进行细分，默认为False |
@@ -100,19 +99,19 @@ export CUDA_VISIBLE_DEVICES=
 * 预测单张图片
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image /path/to/meter_test/20190822_168.jpg --save_dir ./output --use_erode
 ```
 * 预测多张图片
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --image_dir /path/to/meter_test --save_dir ./output --use_erode
 ```
 * 开启摄像头预测
 ```shell
-python3 reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
+python reader_infer.py --detector_dir /path/to/det_inference_model --segmenter_dir /path/to/seg_inference_model --save_dir ./output --use_erode --use_camera
 ```
 ## <h2 id="4">推理部署</h2>
@@ -270,12 +269,12 @@ git clone https://github.com/PaddlePaddle/PaddleX
 * 表盘检测的训练
 ```
-python3 /path/to/PaddleX/examples/meter_reader/train_detection.py
+python /path/to/PaddleX/examples/meter_reader/train_detection.py
 ```
 * 指针和刻度分割的训练
 ```
-python3 /path/to/PaddleX/examples/meter_reader/train_segmentation.py
+python /path/to/PaddleX/examples/meter_reader/train_segmentation.py
 ```

--- a/examples/meter_reader/deploy/python/reader_deploy.py
+++ b/examples/meter_reader/deploy/python/reader_deploy.py
@@ -105,12 +105,6 @@ def parse_args():
        help="Segmentation batch size",
        type=int,
        default=2)
-    parser.add_argument(
-        '--seg_thread_num',
-        dest='seg_thread_num',
-        help="Thread number of segmentation preprocess",
-        type=int,
-        default=2)
    return parser.parse_args()
@@ -143,8 +137,7 @@ class MeterReader:
                use_erode=True,
                erode_kernel=4,
                score_threshold=0.5,
-                seg_batch_size=2,
+                seg_batch_size=2):
-                seg_thread_num=2):
        if isinstance(im_file, str):
            im = cv2.imread(im_file).astype('float32')
        else:
@@ -190,8 +183,7 @@ class MeterReader:
                meter_images.append(resized_meters[j - i])
            result = self.segmenter.batch_predict(
                transforms=self.seg_transforms,
-                img_file_list=meter_images,
+                img_file_list=meter_images)
-                thread_num=seg_thread_num)
            if use_erode:
                kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
                for i in range(len(result)):
@@ -334,7 +326,7 @@ def infer(args):
        for im_file in image_lists:
            meter_reader.predict(im_file, args.save_dir, args.use_erode,
                                 args.erode_kernel, args.score_threshold,
-                                 args.seg_batch_size, args.seg_thread_num)
+                                 args.seg_batch_size)
    elif args.use_camera:
        cap_video = cv2.VideoCapture(args.camera_id)
        if not cap_video.isOpened():
@@ -347,7 +339,7 @@ def infer(args):
            if ret:
                meter_reader.predict(frame, args.save_dir, args.use_erode,
                                     args.erode_kernel, args.score_threshold,
-                                     args.seg_batch_size, args.seg_thread_num)
+                                     args.seg_batch_size)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:

--- a/examples/meter_reader/reader_infer.py
+++ b/examples/meter_reader/reader_infer.py
@@ -105,12 +105,6 @@ def parse_args():
        help="Segmentation batch size",
        type=int,
        default=2)
-    parser.add_argument(
-        '--seg_thread_num',
-        dest='seg_thread_num',
-        help="Thread number of segmentation preprocess",
-        type=int,
-        default=2)
    return parser.parse_args()
@@ -143,8 +137,7 @@ class MeterReader:
                use_erode=True,
                erode_kernel=4,
                score_threshold=0.5,
-                seg_batch_size=2,
+                seg_batch_size=2):
-                seg_thread_num=2):
        if isinstance(im_file, str):
            im = cv2.imread(im_file).astype('float32')
        else:
@@ -190,8 +183,7 @@ class MeterReader:
                meter_images.append(resized_meters[j - i])
            result = self.segmenter.batch_predict(
                transforms=self.seg_transforms,
-                img_file_list=meter_images,
+                img_file_list=meter_images)
-                thread_num=seg_thread_num)
            if use_erode:
                kernel = np.ones((erode_kernel, erode_kernel), np.uint8)
                for i in range(len(result)):
@@ -334,7 +326,7 @@ def infer(args):
        for im_file in image_lists:
            meter_reader.predict(im_file, args.save_dir, args.use_erode,
                                 args.erode_kernel, args.score_threshold,
-                                 args.seg_batch_size, args.seg_thread_num)
+                                 args.seg_batch_size)
    elif args.use_camera:
        cap_video = cv2.VideoCapture(args.camera_id)
        if not cap_video.isOpened():
@@ -347,7 +339,7 @@ def infer(args):
            if ret:
                meter_reader.predict(frame, args.save_dir, args.use_erode,
                                     args.erode_kernel, args.score_threshold,
-                                     args.seg_batch_size, args.seg_thread_num)
+                                     args.seg_batch_size)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:

--- a/examples/multi-channel_remote_sensing/README.md
+++ b/examples/multi-channel_remote_sensing/README.md
+# 多通道遥感影像分割
+遥感影像分割是图像分割领域中的重要应用场景，广泛应用于土地测绘、环境监测、城市建设等领域。遥感影像分割的目标多种多样，有诸如积雪、农作物、道路、建筑、水源等地物目标，也有例如云层的空中目标。
+本案例基于PaddleX实现多通道遥感影像分割，涵盖数据分析、模型训练、模型预测等流程，旨在帮助用户利用深度学习技术解决多通道遥感影像分割问题。
+## 目录
+* [前置依赖](#1)
+* [数据准备](#2)
+* [数据分析](#3)
+* [模型训练](#4)
+* [模型预测](#5)
+## <h2 id="1">前置依赖</h2>
+* Paddle paddle >= 1.8.4
+* Python >= 3.5
+* PaddleX >= 1.1.4
+安装的相关问题参考[PaddleX安装](../../docs/install.md)
+**另外还需安装gdal**, 使用pip安装gdal可能出错，推荐使用conda进行安装：
+```
+conda install gdal
+```
+下载PaddleX源码:  
+```  
+git clone https://github.com/PaddlePaddle/PaddleX
+```
+该案例所有脚本均位于`PaddleX/examples/channel_remote_sensing/`，进入该目录：  
+```
+cd PaddleX/examples/channel_remote_sensing/  
+```
+##  <h2 id="2">数据准备</h2>
+遥感影像的格式多种多样，不同传感器产生的数据格式也可能不同。PaddleX现已兼容以下4种格式图片读取：
+- `tif`
+- `png`, `jpeg`, `bmp`
+- `img`
+- `npy`
+标注图要求必须为单通道的png格式图像，像素值即为对应的类别，像素标注类别需要从0开始递增。例如0，1，2，3表示有4种类别，255用于指定不参与训练和评估的像素，标注类别最多为256类。
+本案例使用[L8 SPARCS公开数据集](https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation)进行云雪分割，该数据集包含80张卫星影像，涵盖10个波段。原始标注图片包含7个类别，分别是`cloud`, `cloud shadow`, `shadow over water`, `snow/ice`, `water`, `land`和`flooded`。由于`flooded`和`shadow over water`2个类别占比仅为`1.8%`和`0.24%`，我们将其进行合并，`flooded`归为`land`，`shadow over water`归为`shadow`，合并后标注包含5个类别。
+数值、类别、颜色对应表：
+|Pixel value|Class|Color|
+|---|---|---|
+|0|cloud|white|
+|1|shadow|black|
+|2|snow/ice|cyan|
+|3|water|blue|
+|4|land|grey|
+<p align="center">
+ <img src="./docs/images/dataset.png" align="middle"
+</p>
+<p align='center'>
+ L8 SPARCS数据集示例
+</p>
+执行以下命令下载并解压经过类别合并后的数据集：
+```shell script
+mkdir dataset && cd dataset
+wget https://paddleseg.bj.bcebos.com/dataset/remote_sensing_seg.zip
+unzip remote_sensing_seg.zip
+cd ..
+```
+其中`data`目录存放遥感影像，`data_vis`目录存放彩色合成预览图，`mask`目录存放标注图。
+## <h2 id="2">数据分析</h2>  
+遥感影像往往由许多波段组成，不同波段数据分布可能大相径庭，例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的分布来优化模型训练效果，需要对数据进行分析。
+参考文档[数据分析](./docs/analysis.md)对训练集进行统计分析，确定图像像素值的截断范围，并统计截断后的均值和方差。
+## <h2 id="2">模型训练</h2>
+本案例选择`UNet`语义分割模型完成云雪分割，运行以下步骤完成模型训练，模型的最优精度`miou`为`78.38%`。
+* 设置GPU卡号
+```shell script
+export CUDA_VISIBLE_DEVICES=0
+```
+* 运行以下脚本开始训练
+```shell script
+python train.py --data_dir dataset/remote_sensing_seg \
+--train_file_list dataset/remote_sensing_seg/train.txt \
+--eval_file_list dataset/remote_sensing_seg/val.txt \
+--label_list dataset/remote_sensing_seg/labels.txt \
+--save_dir saved_model/remote_sensing_unet \
+--num_classes 5 \
+--channel 10 \
+--lr 0.01 \
+--clip_min_value 7172 6561 5777 5103 4291 4000 4000 4232 6934 7199 \
+--clip_max_value 50000 50000 50000 50000 50000 40000 30000 18000 40000 36000 \
+--mean 0.15163569 0.15142828 0.15574491 0.1716084  0.2799778  0.27652043 0.28195933 0.07853807 0.56333154 0.5477584 \
+--std  0.09301891 0.09818967 0.09831126 0.1057784  0.10842132 0.11062996 0.12791838 0.02637859 0.0675052  0.06168227 \
+--num_epochs 500 \
+--train_batch_size 3
+```
+也可以跳过模型训练步骤，下载预训练模型直接进行模型预测：
+```
+wget https://bj.bcebos.com/paddlex/examples/multi-channel_remote_sensing/models/l8sparcs_remote_model.tar.gz
+tar -xvf l8sparcs_remote_model.tar.gz
+```
+## <h2 id="2">模型预测</h2>
+运行以下脚本，对遥感图像进行预测并可视化预测结果，相应地也将对应的标注文件进行可视化，以比较预测效果。
+```shell script
+export CUDA_VISIBLE_DEVICES=0
+python predict.py
+```
+可视化效果如下所示:
+<img src="./docs/images/prediction.jpg" alt="预测图" align=center />
+数值、类别、颜色对应表：
+|Pixel value|Class|Color|
+|---|---|---|
+|0|cloud|white|
+|1|shadow|black|
+|2|snow/ice|cyan|
+|3|water|blue|
+|4|land|grey|
--- a/examples/multi-channel_remote_sensing/docs/analysis.md
+++ b/examples/multi-channel_remote_sensing/docs/analysis.md
+# 数据分析
+遥感影像往往由许多波段组成，不同波段数据分布可能大相径庭，例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的分布来优化模型训练效果，需要对数据进行分析。
+## 目录
+* [1. 统计分析](#1)
+* [2. 确定像素值截断范围](#2)
+* [3. 统计截断后的均值和方差](#3)
+## <h2 id="1">统计分析</h2>
+执行以下脚本，对训练集进行统计分析，屏幕会输出分析结果，同时结果也会保存至文件`train_information.pkl`中：
+```
+python tools/analysis.py
+```
+数据统计分析内容如下：
+* 图像数量
+例如统计出训练集中有64张图片：
+```
+64 samples in file dataset/remote_sensing_seg/train.txt
+```
+* 图像最大和最小的尺寸
+例如统计出训练集中最大的高宽和最小的高宽分别是(1000, 1000)和(1000, 1000):
+```
+Minimal image height: 1000 Minimal image width: 1000.
+Maximal image height: 1000 Maximal image width: 1000.
+```
+* 图像通道数量
+例如统计出图像的通道数量为10:
+```
+Image channel is 10.
+```
+* 图像各通道的最小值和最大值
+最小值和最大值分别以列表的形式输出，按照通道从小到大排列。例如：
+```
+Minimal image value: [7.172e+03 6.561e+03 5.777e+03 5.103e+03 4.291e+03 1.000e+00 1.000e+00 4.232e+03 6.934e+03 7.199e+03]
+Maximal image value: [65535. 65535. 65535. 65535. 65535. 65535. 65535. 56534. 65535. 63215.]
+```
+* 图像各通道的像素值分布
+针对各个通道，统计出各像素值的数量，并以柱状图的形式呈现在以'distribute.png'结尾的图片中。**需要注意的是，为便于观察，纵坐标为对数坐标**。用户可以查看这些图片来选择是否需要对分布在头部和尾部的像素值进行截断。
+```
+Image pixel distribution of each channel is saved with 'distribute.png' in the dataset/remote_sensing_seg
+```
+* 图像各通道归一化后的均值和方差
+各通道归一化系数为各通道最大值与最小值之差，均值和方差以列别形式输出，按照通道从小到大排列。例如：
+```
+Image mean value: [0.23417574 0.22283101 0.2119595  0.2119887  0.27910388 0.21294892 0.17294037 0.10158925 0.43623915 0.41019192]
+Image standard deviation: [0.06831269 0.07243951 0.07284761 0.07875261 0.08120818 0.0609302 0.05110716 0.00696064 0.03849307 0.03205579]
+```
+* 标注图中各类别的数量及比重
+统计各类别的像素数量和在数据集全部像素的占比，以（类别值，该类别的数量，该类别的占比）的格式输出。例如：
+```
+Label pixel information is shown in a format of (label_id, the number of label_id, the ratio of label_id):
+(0, 13302870, 0.20785734374999995)
+(1, 4577005, 0.07151570312500002)
+(2, 3955012, 0.0617970625)
+(3, 2814243, 0.04397254687499999)
+(4, 39350870, 0.6148573437500001)
+```
+## <h2 id="2">2 确定像素值截断范围</h2>
+遥感影像数据分布范围广，往往存在一些异常值，这会影响算法对实际数据分布的拟合效果。为更好地对数据进行归一化，可以抑制遥感影像中少量的异常值。根据`图像各通道的像素值分布`来确定像素值的截断范围，并在后续图像预处理过程中对超出范围的像素值通过截断进行校正，从而去除异常值带来的干扰。**注意：该步骤是否执行根据数据集实际分布来决定。**
+例如各通道的像素值分布可视化效果如下：
+<img src="./images/image_pixel_distribution.png" width = "600" height = "600" alt="像素值分布图" align=center />
+对于上述分布，我们选取的截断范围是(按照通道从小到大排列)：
+```
+截断范围最小值： clip_min_value = [7172,  6561,  5777, 5103, 4291, 4000, 4000, 4232, 6934, 7199]
+截断范围最大值： clip_max_value = [50000, 50000, 50000, 50000, 50000, 40000, 30000, 18000, 40000, 36000]
+```
+## <h2 id="3">3 确定像素值截断范围</h2>
+为避免数据截断范围选取不当带来的影响，应该统计异常值像素占比，确保受影响的像素比例不要过高。接着对截断后的数据计算归一化后的均值和方差，**用于后续模型训练时的图像预处理参数设置**。
+执行以下脚本：
+```
+python tools/cal_clipped_mean_std.py
+```
+截断像素占比统计结果如下:
+```
+Channel 0, the ratio of pixels to be clipped = 0.00054778125
+Channel 1, the ratio of pixels to be clipped = 0.0011129375
+Channel 2, the ratio of pixels to be clipped = 0.000843703125
+Channel 3, the ratio of pixels to be clipped = 0.00127125
+Channel 4, the ratio of pixels to be clipped = 0.001330140625
+Channel 5, the ratio of pixels to be clipped = 8.1375e-05
+Channel 6, the ratio of pixels to be clipped = 0.0007348125
+Channel 7, the ratio of pixels to be clipped = 6.5625e-07
+Channel 8, the ratio of pixels to be clipped = 0.000185921875
+Channel 9, the ratio of pixels to be clipped = 0.000139671875
+```
+可看出，被截断像素占比均不超过0.2%。
+裁剪后数据的归一化系数如下：
+```
+Image mean value: [0.15163569 0.15142828 0.15574491 0.1716084  0.2799778  0.27652043 0.28195933 0.07853807 0.56333154 0.5477584 ]
+Image standard deviation: [0.09301891 0.09818967 0.09831126 0.1057784  0.10842132 0.11062996 0.12791838 0.02637859 0.0675052  0.06168227]
+(normalized by (clip_max_value - clip_min_value), arranged in 0-10 channel order)
+```
--- a/examples/multi-channel_remote_sensing/docs/images/dataset.png
+++ b/examples/multi-channel_remote_sensing/docs/images/dataset.png
--- a/examples/multi-channel_remote_sensing/docs/images/image_pixel_distribution.png
+++ b/examples/multi-channel_remote_sensing/docs/images/image_pixel_distribution.png
--- a/examples/multi-channel_remote_sensing/docs/images/prediction.jpg
+++ b/examples/multi-channel_remote_sensing/docs/images/prediction.jpg
--- a/examples/multi-channel_remote_sensing/predict.py
+++ b/examples/multi-channel_remote_sensing/predict.py
+import numpy as np
+from PIL import Image
+import paddlex as pdx
+model_dir = "l8sparcs_remote_model/"
+img_file = "dataset/remote_sensing_seg/data/LC80150242014146LGN00_23_data.tif"
+label_file = "dataset/remote_sensing_seg/mask/LC80150242014146LGN00_23_mask.png"
+color = [255, 255, 255, 0, 0, 0, 255, 255, 0, 255, 0, 0, 150, 150, 150]
+# 预测并可视化预测结果
+model = pdx.load_model(model_dir)
+pred = model.predict(img_file)
+#pred = model.overlap_tile_predict(img_file, tile_size=[512, 512], pad_size=[64, 64], batch_size=32)
+pdx.seg.visualize(
+    img_file, pred, weight=0., save_dir='./output/pred', color=color)
+# 可视化标注文件
+label = np.asarray(Image.open(label_file))
+pred = {'label_map': label}
+pdx.seg.visualize(
+    img_file, pred, weight=0., save_dir='./output/gt', color=color)
--- a/examples/multi-channel_remote_sensing/tools/analysis.py
+++ b/examples/multi-channel_remote_sensing/tools/analysis.py
+import paddlex as pdx
+train_analysis = pdx.datasets.analysis.Seg(
+    data_dir='dataset/remote_sensing_seg',
+    file_list='dataset/remote_sensing_seg/train.txt',
+    label_list='dataset/remote_sensing_seg/labels.txt')
+train_analysis.analysis()
--- a/examples/multi-channel_remote_sensing/tools/cal_clipped_mean_std.py
+++ b/examples/multi-channel_remote_sensing/tools/cal_clipped_mean_std.py
+import paddlex as pdx
+clip_min_value = [7172, 6561, 5777, 5103, 4291, 4000, 4000, 4232, 6934, 7199]
+clip_max_value = [
+    50000, 50000, 50000, 50000, 50000, 40000, 30000, 18000, 40000, 36000
+]
+data_info_file = 'dataset/remote_sensing_seg/train_infomation.pkl'
+train_analysis = pdx.datasets.analysis.Seg(
+    data_dir='dataset/remote_sensing_seg',
+    file_list='dataset/remote_sensing_seg/train.txt',
+    label_list='dataset/remote_sensing_seg/labels.txt')
+train_analysis.cal_clipped_mean_std(clip_min_value, clip_max_value,
+                                    data_info_file)
--- a/examples/multi-channel_remote_sensing/train.py
+++ b/examples/multi-channel_remote_sensing/train.py
+# coding: utf8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path as osp
+import argparse
+from paddlex.seg import transforms
+import paddlex as pdx
+def parse_args():
+    parser = argparse.ArgumentParser(description='RemoteSensing training')
+    parser.add_argument(
+        '--data_dir',
+        dest='data_dir',
+        help='dataset directory',
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--train_file_list',
+        dest='train_file_list',
+        help='train file_list',
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--eval_file_list',
+        dest='eval_file_list',
+        help='eval file_list',
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--label_list',
+        dest='label_list',
+        help='label_list file',
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='model save directory',
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--num_classes',
+        dest='num_classes',
+        help='Number of classes',
+        default=None,
+        type=int)
+    parser.add_argument(
+        '--channel',
+        dest='channel',
+        help='number of data channel',
+        default=3,
+        type=int)
+    parser.add_argument(
+        '--clip_min_value',
+        dest='clip_min_value',
+        help='Min values for clipping data',
+        nargs='+',
+        default=None,
+        type=int)
+    parser.add_argument(
+        '--clip_max_value',
+        dest='clip_max_value',
+        help='Max values for clipping data',
+        nargs='+',
+        default=None,
+        type=int)
+    parser.add_argument(
+        '--mean',
+        dest='mean',
+        help='Data means',
+        nargs='+',
+        default=None,
+        type=float)
+    parser.add_argument(
+        '--std',
+        dest='std',
+        help='Data standard deviation',
+        nargs='+',
+        default=None,
+        type=float)
+    parser.add_argument(
+        '--num_epochs',
+        dest='num_epochs',
+        help='number of traing epochs',
+        default=100,
+        type=int)
+    parser.add_argument(
+        '--train_batch_size',
+        dest='train_batch_size',
+        help='training batch size',
+        default=4,
+        type=int)
+    parser.add_argument(
+        '--lr', dest='lr', help='learning rate', default=0.01, type=float)
+    return parser.parse_args()
+args = parse_args()
+data_dir = args.data_dir
+train_list = args.train_file_list
+val_list = args.eval_file_list
+label_list = args.label_list
+save_dir = args.save_dir
+num_classes = args.num_classes
+channel = args.channel
+clip_min_value = args.clip_min_value
+clip_max_value = args.clip_max_value
+mean = args.mean
+std = args.std
+num_epochs = args.num_epochs
+train_batch_size = args.train_batch_size
+lr = args.lr
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomVerticalFlip(0.5),
+    transforms.RandomHorizontalFlip(0.5),
+    transforms.ResizeStepScaling(0.5, 2.0, 0.25),
+    transforms.RandomPaddingCrop(im_padding_value=[1000] * channel),
+    transforms.Clip(
+        min_val=clip_min_value, max_val=clip_max_value),
+    transforms.Normalize(
+        min_val=clip_min_value, max_val=clip_max_value, mean=mean, std=std),
+])
+eval_transforms = transforms.Compose([
+    transforms.Clip(
+        min_val=clip_min_value, max_val=clip_max_value),
+    transforms.Normalize(
+        min_val=clip_min_value, max_val=clip_max_value, mean=mean, std=std),
+])
+train_dataset = pdx.datasets.SegDataset(
+    data_dir=data_dir,
+    file_list=train_list,
+    label_list=label_list,
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.SegDataset(
+    data_dir=data_dir,
+    file_list=val_list,
+    label_list=label_list,
+    transforms=eval_transforms)
+model = pdx.seg.UNet(num_classes=num_classes, input_channel=channel)
+model.train(
+    num_epochs=num_epochs,
+    train_dataset=train_dataset,
+    train_batch_size=train_batch_size,
+    eval_dataset=eval_dataset,
+    save_interval_epochs=5,
+    log_interval_steps=10,
+    save_dir=save_dir,
+    learning_rate=lr,
+    use_vdl=True)
--- a/examples/remote_sensing/README.md
+++ b/examples/remote_sensing/README.md
-# 遥感影像分割
+# RGB遥感影像分割
-本案例基于PaddleX实现遥感影像分割，提供无重叠的大图切小图以及有重叠的大图切小图两种预测方式。
+本案例基于PaddleX实现遥感影像分割，提供滑动窗口预测方式，以避免在直接对大尺寸图片进行预测时显存不足的发生。此外，滑动窗口之间的重叠程度可配置，以此消除最终预测结果中各窗口拼接处的裂痕感。
 ## 目录
 * [数据准备](#1)
@@ -12,7 +12,7 @@
 * Paddle paddle >= 1.8.4
 * Python >= 3.5
-* PaddleX >= 1.1.0
+* PaddleX >= 1.1.4
 安装的相关问题参考[PaddleX安装](../install.md)
@@ -32,51 +32,57 @@ cd PaddleX/examples/remote_sensing/
 本案例使用2015 CCF大数据比赛提供的高清遥感影像，包含5张带标注的RGB图像，图像尺寸最大有7969 × 7939、最小有4011 × 2470。该数据集共标注了5类物体，分别是背景（标记为0）、植被（标记为1）、建筑（标记为2）、水体（标记为3）、道路 （标记为4）。
-本案例将前4张图片划分入训练集，第5张图片作为验证集。为增加训练时的批量大小，以滑动窗口为(1024，1024)、步长为(512, 512)对前4张图片进行切分，加上原本的4张大尺寸图片，训练集一共有688张图片。直接对大图片进行验证会导致显存不足，为避免此类问题的出现，针对验证集，以滑动窗口为(769, 769)、步长为(769，769)对第5张图片进行切分，得到40张子图片。
+本案例将前4张图片划分入训练集，第5张图片作为验证集。为增加训练时的批量大小，以滑动窗口为(1024，1024)、步长为(512, 512)对前4张图片进行切分，加上原本的4张大尺寸图片，训练集一共有688张图片。在训练过程中直接对大图片进行验证会导致显存不足，为避免此类问题的出现，针对验证集以滑动窗口为(769, 769)、步长为(769，769)对第5张图片进行切分，得到40张子图片。
 运行以下脚本，下载原始数据集，并完成数据集的切分：
 ```
-python3 prepare_data.py
+python prepare_data.py
 ```
 ## <h2 id="2">模型训练</h2>
 分割模型选择Backbone为MobileNetv3_large_ssld的Deeplabv3模型，该模型兼备高性能高精度的优点。运行以下脚本，进行模型训练：
 ```
-python3 train.py
+python train.py
 ```
-## <h2 id="2">模型预测</h2>
+也可以跳过模型训练步骤，直接下载预训练模型进行后续的模型预测和评估：
+```
+wget https://bj.bcebos.com/paddlex/examples/remote_sensing/models/ccf_remote_model.tar.gz
+tar -xvf ccf_remote_model.tar.gz
+```
+## <h2 id="3">模型预测</h2>
-直接对大图片进行预测会导致显存不足，为避免此类问题的出现，本案例提供了两种预测方式：无重叠的大图切小图和有重叠的大图切小图。
+直接对大尺寸图片进行预测会导致显存不足，为避免此类问题的出现，本案例提供了滑动窗口预测接口，支持有重叠和无重叠两种方式。
-* 无重叠的大图切小图
+* 无重叠的滑动窗口预测
-将大图像切分成互不重叠多个小块，分别对每个小块进行预测，最后将小块预测结果拼接成大图预测结果。由于每个小块边缘部分的预测效果会比中间部分的差，因此每个小块拼接处可能会有明显的裂痕感。
+在输入图片上以固定大小的窗口滑动，分别对每个窗口下的图像进行预测，最后将各窗口的预测结果拼接成输入图片的预测结果。由于每个窗口边缘部分的预测效果会比中间部分的差，因此每个窗口拼接处可能会有明显的裂痕感。
-该预测方式的API接口详见[tile_predict](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#tile-predict)。
+该预测方式的API接口详见[overlap_tile_predict](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict)，**使用时需要把参数`pad_size`设置为`[0, 0]`**。
-* 有重叠的大图切小图
+* 有重叠的滑动窗口预测
-Unet论文作者提出一种有重叠的大图切小图策略（Overlap-tile strategy）来消除拼接处的裂痕感。每次划分小块时向四周扩展面积，例如下图中的蓝色部分区域，到拼接大图时取小块中间部分的预测结果，例如下图中的黄色部分区域，对于处于原始图像边缘处的小块，其扩展面积下的像素则通过将边缘部分像素镜像填补得到。
+在Unet论文中，作者提出一种有重叠的滑动窗口预测策略（Overlap-tile strategy）来消除拼接处的裂痕感。对各滑动窗口预测时，会向四周扩展一定的面积，对扩展后的窗口进行预测，例如下图中的蓝色部分区域，到拼接时只取各窗口中间部分的预测结果，例如下图中的黄色部分区域。位于输入图像边缘处的窗口，其扩展面积下的像素则通过将边缘部分像素镜像填补得到。
 该预测方式的API接口说明详见[overlap_tile_predict](https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict)。
 ![](images/overlap_tile.png)
-相比无重叠的大图切小图，有重叠的大图切小图策略将本案例的模型精度miou从80.58%提升至81.52%，并且将预测可视化结果中裂痕感显著消除，可见下图中两种预测方式的效果对比。
+相比无重叠的滑动窗口预测，有重叠的滑动窗口预测策略将本案例的模型精度miou从80.58%提升至81.52%，并且将预测可视化结果中裂痕感显著消除，可见下图中两种预测方式的效果对比。
-![](images/visualize_compare.png)
+![](images/visualize_compare.jpg)
-运行以下脚本使用有重叠的大图切小图预测方式进行预测。如需使用无重叠的大图切小图的预测方式，参考以下脚本中的注释修改模型预测接口：
+运行以下脚本使用有重叠的滑动窗口进行预测：
 ```
-python3 predict.py
+python predict.py
 ```
-## <h2 id="2">模型评估</h2>
+## <h2 id="4">模型评估</h2>
-在训练过程中，每隔10个迭代轮数会评估一次模型在验证集的精度。由于已事先将原始大尺寸图片切分成小块，此时相当于使用无重叠的大图切小图预测方式，最优模型精度miou为80.58%。运行以下脚本，将采用有重叠的大图切小图的预测方式，重新评估原始大尺寸图片的模型精度，此时miou为81.52%。
+在训练过程中，每隔10个迭代轮数会评估一次模型在验证集的精度。由于已事先将原始大尺寸图片切分成小块，此时相当于使用无重叠的滑动窗口预测方式，最优模型精度miou为80.58%。运行以下脚本，将采用有重叠的滑动窗口预测方式，重新评估原始大尺寸图片的模型精度，此时miou为81.52%。
 ```
-python3 eval.py
+python eval.py
 ```
--- a/examples/remote_sensing/eval.py
+++ b/examples/remote_sensing/eval.py
@@ -2,6 +2,7 @@
 # 说明文档：https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
 import os
 os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 import numpy as np
 import cv2
 from PIL import Image
@@ -11,38 +12,31 @@ import paddlex as pdx
 import paddlex.utils.logging as logging
 from paddlex.cv.models.utils.seg_eval import ConfusionMatrix
-# 导入模型参数
-model = pdx.load_model('output/deeplabv3p_mobilenetv3_large_ssld/best_model')
-# 指定待评估图像路径及其标注文件路径
+def update_confusion_matrix(confusion_matrix, predction, label):
+    pred = predction["label_map"]
+    pred = pred[np.newaxis, :, :, np.newaxis]
+    pred = pred.astype(np.int64)
+    label = label[np.newaxis, np.newaxis, :, :]
+    mask = label != model.ignore_index
+    confusion_matrix.calculate(pred=pred, label=label, ignore=mask)
+model_dir = 'output/deeplabv3p_mobilenetv3_large_ssld/best_model'
 img_file = "dataset/JPEGImages/5.png"
 label_file = "dataset/Annotations/5_class.png"
-# 定义用于计算miou、iou、macc、acc、kapp指标的混淆矩阵类
+model = pdx.load_model(model_dir)
-conf_mat = ConfusionMatrix(model.num_classes, streaming=True)
-# 使用"无重叠的大图切小图"方式进行预测：将大图像切分成互不重叠多个小块，分别对每个小块进行预测
+conf_mat = ConfusionMatrix(model.num_classes, streaming=True)
-# 最后将小块预测结果拼接成大图预测结果
-# API说明：https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#tile-predict
-# tile_predict = model.tile_predict(img_file=img_file, tile_size=(769, 769))
-# pred = tile_predict["label_map"]
-# 使用"有重叠的大图切小图"策略进行预测：将大图像切分成相互重叠的多个小块，
-# 分别对每个小块进行预测，将小块预测结果的中间部分拼接成大图预测结果
 # API说明：https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict
 overlap_tile_predict = model.overlap_tile_predict(
-    img_file=img_file, tile_size=(769, 769))
+    img_file=img_file, tile_size=(769, 769), pad_size=[64, 64], batch_size=32)
-pred = overlap_tile_predict["label_map"]
+label = np.asarray(Image.open(label_file))
-# 更新混淆矩阵
+update_confusion_matrix(conf_mat, overlap_tile_predict, label)
-pred = pred[np.newaxis, :, :, np.newaxis]
-pred = pred.astype(np.int64)
-label = np.asarray(Image.open("dataset/Annotations/5_class.png"))
-label = label[np.newaxis, np.newaxis, :, :]
-mask = label != model.ignore_index
-conf_mat.calculate(pred=pred, label=label, ignore=mask)
-# 计算miou、iou、macc、acc、kapp
 category_iou, miou = conf_mat.mean_iou()
 category_acc, macc = conf_mat.accuracy()
 logging.info(

--- a/examples/remote_sensing/images/visualize_compare.jpg
+++ b/examples/remote_sensing/images/visualize_compare.jpg
--- a/examples/remote_sensing/predict.py
+++ b/examples/remote_sensing/predict.py
@@ -5,26 +5,14 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 import paddlex as pdx
-# 导入模型参数
+model_dir = 'output/deeplabv3p_mobilenetv3_large_ssld/best_model'
-model = pdx.load_model('output/deeplabv3p_mobilenetv3_large_ssld/best_model')
-# 指定待预测图像路径
 img_file = "dataset/JPEGImages/5.png"
+save_dir = 'output/deeplabv3p_mobilenetv3_large_ssld/'
-# 使用"无重叠的大图切小图"方式进行预测：将大图像切分成互不重叠多个小块，分别对每个小块进行预测
+model = pdx.load_model('output/deeplabv3p_mobilenetv3_large_ssld/best_model')
-# 最后将小块预测结果拼接成大图预测结果
-# API说明：https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#tile-predict
-# pred = model.tile_predict(img_file=img_file, tile_size=(769, 769))
-# 使用"有重叠的大图切小图"策略进行预测：将大图像切分成相互重叠的多个小块，
-# 分别对每个小块进行预测，将小块预测结果的中间部分拼接成大图预测结果
 # API说明：https://paddlex.readthedocs.io/zh_CN/develop/apis/models/semantic_segmentation.html#overlap-tile-predict
-pred = model.overlap_tile_predict(img_file=img_file, tile_size=(769, 769))
+pred = model.overlap_tile_predict(
+    img_file=img_file, tile_size=(769, 769), pad_size=[64, 64], batch_size=32)
-# 可视化预测结果
+pdx.seg.visualize(img_file, pred, weight=0., save_dir=save_dir)
-# API说明：
-pdx.seg.visualize(
-    img_file,
-    pred,
-    weight=0.,
-    save_dir='output/deeplabv3p_mobilenetv3_large_ssld/')
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -44,8 +44,8 @@ except:
    )
 import paddlehub as hub
-if hub.version.hub_version < '1.6.2':
+if hub.version.hub_version < '1.8.2':
-    raise Exception("[ERROR] paddlehub >= 1.6.2 is required")
+    raise Exception("[ERROR] paddlehub >= 1.8.2 is required")
 env_info = get_environ_info()
 load_model = cv.models.load_model
@@ -56,4 +56,4 @@ log_level = 2
 from . import interpret
-__version__ = '1.1.1'
+__version__ = '1.2.1'
--- a/paddlex/command.py
+++ b/paddlex/command.py
@@ -51,6 +51,12 @@ def arg_parser():
        action="store_true",
        default=False,
        help="export onnx model for deployment")
+    parser.add_argument(
+        "--onnx_opset",
+        "-oo",
+        type=int,
+        default=10,
+        help="when use paddle2onnx, set onnx opset version to export")
    parser.add_argument(
        "--data_conversion",
        "-dc",
@@ -162,7 +168,7 @@ def main():
            logging.error(
                "paddlex --export_inference --model_dir model_path --save_dir infer_model"
            )
-        pdx.convertor.export_onnx_model(model, args.save_dir)
+        pdx.convertor.export_onnx_model(model, args.save_dir, args.onnx_opset)
    if args.data_conversion:
        assert args.source is not None, "--source should be defined while converting dataset"
@@ -183,7 +189,7 @@ def main():
    if args.split_dataset:
        assert args.dataset_dir is not None, "--dataset_dir should be defined while spliting dataset"
-        assert args.format is not None, "--form should be defined while spliting dataset"
+        assert args.format is not None, "--format should be defined while spliting dataset"
        assert args.val_value is not None, "--val_value should be defined while spliting dataset"
        dataset_dir = args.dataset_dir

--- a/paddlex/convertor.py
+++ b/paddlex/convertor.py
@@ -29,10 +29,12 @@ def export_onnx(model_dir, save_dir, fixed_input_shape):
    export_onnx_model(model, save_dir)
-def export_onnx_model(model, save_dir):
+def export_onnx_model(model, save_dir, opset_version=10):
-    if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
+    if model.__class__.__name__ == "FastSCNN" or (
+            model.model_type == "detector" and
+            model.__class__.__name__ != "YOLOv3"):
        logging.error(
-            "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
+            "Only image classifier models, detection models(YOLOv3) and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
        )
    try:
        import x2paddle
@@ -41,6 +43,406 @@ def export_onnx_model(model, save_dir):
    except:
        logging.error(
            "You need to install x2paddle first, pip install x2paddle>=0.7.4")
-    from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper
+    if opset_version == 10 and model.__class__.__name__ == "YOLOv3":
+        logging.warning(
+            "Export for openVINO by default, the output of multiclass_nms exported to onnx will contains background. If you need onnx completely consistent with paddle, please use X2Paddle to export"
+        )
+        x2paddle.op_mapper.paddle2onnx.opset10.paddle_custom_layer.multiclass_nms.multiclass_nms = multiclass_nms_for_openvino
+    from x2paddle.op_mapper.paddle2onnx.paddle_op_mapper import PaddleOpMapper
    mapper = PaddleOpMapper()
-    mapper.convert(model.test_prog, save_dir)
+    mapper.convert(
+        model.test_prog,
+        save_dir,
+        scope=model.scope,
+        opset_version=opset_version)
+def multiclass_nms_for_openvino(op, block):
+    """
+    Convert the paddle multiclass_nms to onnx op.
+    This op is get the select boxes from origin boxes.
+    This op is for OpenVINO, which donn't support dynamic shape).
+    """
+    import math
+    import sys
+    import numpy as np
+    import paddle.fluid.core as core
+    import paddle.fluid as fluid
+    import onnx
+    import warnings
+    from onnx import helper, onnx_pb
+    inputs = dict()
+    outputs = dict()
+    attrs = dict()
+    for name in op.input_names:
+        inputs[name] = op.input(name)
+    for name in op.output_names:
+        outputs[name] = op.output(name)
+    for name in op.attr_names:
+        attrs[name] = op.attr(name)
+    result_name = outputs['Out'][0]
+    background = attrs['background_label']
+    normalized = attrs['normalized']
+    if normalized == False:
+        warnings.warn(
+            'The parameter normalized of multiclass_nms OP of Paddle is False, which has diff with ONNX. \
+                         Please set normalized=True in multiclass_nms of Paddle'
+        )
+    #convert the paddle attribute to onnx tensor
+    name_score_threshold = [outputs['Out'][0] + "@score_threshold"]
+    name_iou_threshold = [outputs['Out'][0] + "@iou_threshold"]
+    name_keep_top_k = [outputs['Out'][0] + '@keep_top_k']
+    name_keep_top_k_2D = [outputs['Out'][0] + '@keep_top_k_1D']
+    node_score_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_score_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_score_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['score_threshold'])]))
+    node_iou_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_iou_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_iou_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['nms_threshold'])]))
+    node_keep_top_k = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=(),
+            vals=[np.int64(attrs['keep_top_k'])]))
+    node_keep_top_k_2D = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k_2D,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k_2D[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[1, 1],
+            vals=[np.int64(attrs['keep_top_k'])]))
+    # the paddle data format is x1,y1,x2,y2
+    kwargs = {'center_point_box': 0}
+    name_select_nms = [outputs['Out'][0] + "@select_index"]
+    node_select_nms= onnx.helper.make_node(
+        'NonMaxSuppression',
+        inputs=inputs['BBoxes'] + inputs['Scores'] + name_keep_top_k +\
+            name_iou_threshold + name_score_threshold,
+        outputs=name_select_nms)
+    # step 1 nodes select the nms class
+    node_list = [
+        node_score_threshold, node_iou_threshold, node_keep_top_k,
+        node_keep_top_k_2D, node_select_nms
+    ]
+    # create some const value to use
+    name_const_value = [result_name+"@const_0",
+        result_name+"@const_1",\
+        result_name+"@const_2",\
+        result_name+"@const_-1"]
+    value_const_value = [0, 1, 2, -1]
+    for name, value in zip(name_const_value, value_const_value):
+        node = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=[name],
+            value=onnx.helper.make_tensor(
+                name=name + "@const",
+                data_type=onnx.TensorProto.INT64,
+                dims=[1],
+                vals=[value]))
+        node_list.append(node)
+    # In this code block, we will deocde the raw score data, reshape N * C * M to 1 * N*C*M
+    # and the same time, decode the select indices to 1 * D, gather the select_indices
+    outputs_gather_1_ = [result_name + "@gather_1_"]
+    node_gather_1_ = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_1"],
+        outputs=outputs_gather_1_,
+        axis=1)
+    node_list.append(node_gather_1_)
+    outputs_gather_1 = [result_name + "@gather_1"]
+    node_gather_1 = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_1_,
+        outputs=outputs_gather_1,
+        axes=[0])
+    node_list.append(node_gather_1)
+    outputs_gather_2_ = [result_name + "@gather_2_"]
+    node_gather_2_ = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_2"],
+        outputs=outputs_gather_2_,
+        axis=1)
+    node_list.append(node_gather_2_)
+    outputs_gather_2 = [result_name + "@gather_2"]
+    node_gather_2 = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_2_,
+        outputs=outputs_gather_2,
+        axes=[0])
+    node_list.append(node_gather_2)
+    # reshape scores N * C * M to (N*C*M) * 1
+    outputs_reshape_scores_rank1 = [result_name + "@reshape_scores_rank1"]
+    node_reshape_scores_rank1 = onnx.helper.make_node(
+        "Reshape",
+        inputs=inputs['Scores'] + [result_name + "@const_-1"],
+        outputs=outputs_reshape_scores_rank1)
+    node_list.append(node_reshape_scores_rank1)
+    # get the shape of scores
+    outputs_shape_scores = [result_name + "@shape_scores"]
+    node_shape_scores = onnx.helper.make_node(
+        'Shape', inputs=inputs['Scores'], outputs=outputs_shape_scores)
+    node_list.append(node_shape_scores)
+    # gather the index: 2 shape of scores
+    outputs_gather_scores_dim1 = [result_name + "@gather_scores_dim1"]
+    node_gather_scores_dim1 = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_scores + [result_name + "@const_2"],
+        outputs=outputs_gather_scores_dim1,
+        axis=0)
+    node_list.append(node_gather_scores_dim1)
+    # mul class * M
+    outputs_mul_classnum_boxnum = [result_name + "@mul_classnum_boxnum"]
+    node_mul_classnum_boxnum = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_gather_1 + outputs_gather_scores_dim1,
+        outputs=outputs_mul_classnum_boxnum)
+    node_list.append(node_mul_classnum_boxnum)
+    # add class * M * index
+    outputs_add_class_M_index = [result_name + "@add_class_M_index"]
+    node_add_class_M_index = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_mul_classnum_boxnum + outputs_gather_2,
+        outputs=outputs_add_class_M_index)
+    node_list.append(node_add_class_M_index)
+    # Squeeze the indices to 1 dim
+    outputs_squeeze_select_index = [result_name + "@squeeze_select_index"]
+    node_squeeze_select_index = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_add_class_M_index,
+        outputs=outputs_squeeze_select_index,
+        axes=[0, 2])
+    node_list.append(node_squeeze_select_index)
+    # gather the data from flatten scores
+    outputs_gather_select_scores = [result_name + "@gather_select_scores"]
+    node_gather_select_scores = onnx.helper.make_node('Gather',
+        inputs=outputs_reshape_scores_rank1 + \
+            outputs_squeeze_select_index,
+        outputs=outputs_gather_select_scores,
+        axis=0)
+    node_list.append(node_gather_select_scores)
+    # get nums to input TopK
+    outputs_shape_select_num = [result_name + "@shape_select_num"]
+    node_shape_select_num = onnx.helper.make_node(
+        'Shape',
+        inputs=outputs_gather_select_scores,
+        outputs=outputs_shape_select_num)
+    node_list.append(node_shape_select_num)
+    outputs_gather_select_num = [result_name + "@gather_select_num"]
+    node_gather_select_num = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_select_num + [result_name + "@const_0"],
+        outputs=outputs_gather_select_num,
+        axis=0)
+    node_list.append(node_gather_select_num)
+    outputs_unsqueeze_select_num = [result_name + "@unsqueeze_select_num"]
+    node_unsqueeze_select_num = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_select_num,
+        outputs=outputs_unsqueeze_select_num,
+        axes=[0])
+    node_list.append(node_unsqueeze_select_num)
+    outputs_concat_topK_select_num = [result_name + "@conat_topK_select_num"]
+    node_conat_topK_select_num = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_unsqueeze_select_num + name_keep_top_k_2D,
+        outputs=outputs_concat_topK_select_num,
+        axis=0)
+    node_list.append(node_conat_topK_select_num)
+    outputs_cast_concat_topK_select_num = [
+        result_name + "@concat_topK_select_num"
+    ]
+    node_outputs_cast_concat_topK_select_num = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_concat_topK_select_num,
+        outputs=outputs_cast_concat_topK_select_num,
+        to=6)
+    node_list.append(node_outputs_cast_concat_topK_select_num)
+    # get min(topK, num_select)
+    outputs_compare_topk_num_select = [
+        result_name + "@compare_topk_num_select"
+    ]
+    node_compare_topk_num_select = onnx.helper.make_node(
+        'ReduceMin',
+        inputs=outputs_cast_concat_topK_select_num,
+        outputs=outputs_compare_topk_num_select,
+        keepdims=0)
+    node_list.append(node_compare_topk_num_select)
+    # unsqueeze the indices to 1D tensor
+    outputs_unsqueeze_topk_select_indices = [
+        result_name + "@unsqueeze_topk_select_indices"
+    ]
+    node_unsqueeze_topk_select_indices = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_compare_topk_num_select,
+        outputs=outputs_unsqueeze_topk_select_indices,
+        axes=[0])
+    node_list.append(node_unsqueeze_topk_select_indices)
+    # cast the indices to INT64
+    outputs_cast_topk_indices = [result_name + "@cast_topk_indices"]
+    node_cast_topk_indices = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_unsqueeze_topk_select_indices,
+        outputs=outputs_cast_topk_indices,
+        to=7)
+    node_list.append(node_cast_topk_indices)
+    # select topk scores  indices
+    outputs_topk_select_topk_indices = [result_name + "@topk_select_topk_values",\
+        result_name + "@topk_select_topk_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_gather_select_scores + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_topk_indices)
+    node_list.append(node_topk_select_topk_indices)
+    # gather topk label, scores, boxes
+    outputs_gather_topk_scores = [result_name + "@gather_topk_scores"]
+    node_gather_topk_scores = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_select_scores +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_scores,
+        axis=0)
+    node_list.append(node_gather_topk_scores)
+    outputs_gather_topk_class = [result_name + "@gather_topk_class"]
+    node_gather_topk_class = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_1 + [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_class,
+        axis=1)
+    node_list.append(node_gather_topk_class)
+    # gather the boxes need to gather the boxes id, then get boxes
+    outputs_gather_topk_boxes_id = [result_name + "@gather_topk_boxes_id"]
+    node_gather_topk_boxes_id = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_2 + [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_boxes_id,
+        axis=1)
+    node_list.append(node_gather_topk_boxes_id)
+    # squeeze the gather_topk_boxes_id to 1 dim
+    outputs_squeeze_topk_boxes_id = [result_name + "@squeeze_topk_boxes_id"]
+    node_squeeze_topk_boxes_id = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_gather_topk_boxes_id,
+        outputs=outputs_squeeze_topk_boxes_id,
+        axes=[0, 2])
+    node_list.append(node_squeeze_topk_boxes_id)
+    outputs_gather_select_boxes = [result_name + "@gather_select_boxes"]
+    node_gather_select_boxes = onnx.helper.make_node(
+        'Gather',
+        inputs=inputs['BBoxes'] + outputs_squeeze_topk_boxes_id,
+        outputs=outputs_gather_select_boxes,
+        axis=1)
+    node_list.append(node_gather_select_boxes)
+    # concat the final result
+    # before concat need to cast the class to float
+    outputs_cast_topk_class = [result_name + "@cast_topk_class"]
+    node_cast_topk_class = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_gather_topk_class,
+        outputs=outputs_cast_topk_class,
+        to=1)
+    node_list.append(node_cast_topk_class)
+    outputs_unsqueeze_topk_scores = [result_name + "@unsqueeze_topk_scores"]
+    node_unsqueeze_topk_scores = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_topk_scores,
+        outputs=outputs_unsqueeze_topk_scores,
+        axes=[0, 2])
+    node_list.append(node_unsqueeze_topk_scores)
+    inputs_concat_final_results = outputs_cast_topk_class + outputs_unsqueeze_topk_scores +\
+        outputs_gather_select_boxes
+    outputs_sort_by_socre_results = [result_name + "@concat_topk_scores"]
+    node_sort_by_socre_results = onnx.helper.make_node(
+        'Concat',
+        inputs=inputs_concat_final_results,
+        outputs=outputs_sort_by_socre_results,
+        axis=2)
+    node_list.append(node_sort_by_socre_results)
+    # select topk classes indices
+    outputs_squeeze_cast_topk_class = [
+        result_name + "@squeeze_cast_topk_class"
+    ]
+    node_squeeze_cast_topk_class = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_cast_topk_class,
+        outputs=outputs_squeeze_cast_topk_class,
+        axes=[0, 2])
+    node_list.append(node_squeeze_cast_topk_class)
+    outputs_neg_squeeze_cast_topk_class = [
+        result_name + "@neg_squeeze_cast_topk_class"
+    ]
+    node_neg_squeeze_cast_topk_class = onnx.helper.make_node(
+        'Neg',
+        inputs=outputs_squeeze_cast_topk_class,
+        outputs=outputs_neg_squeeze_cast_topk_class)
+    node_list.append(node_neg_squeeze_cast_topk_class)
+    outputs_topk_select_classes_indices = [result_name + "@topk_select_topk_classes_scores",\
+        result_name + "@topk_select_topk_classes_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_neg_squeeze_cast_topk_class + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_classes_indices)
+    node_list.append(node_topk_select_topk_indices)
+    outputs_concat_final_results = outputs['Out']
+    node_concat_final_results = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_sort_by_socre_results +
+        [outputs_topk_select_classes_indices[1]],
+        outputs=outputs_concat_final_results,
+        axis=1)
+    node_list.append(node_concat_final_results)
+    return node_list
--- a/paddlex/cv/datasets/__init__.py
+++ b/paddlex/cv/datasets/__init__.py
@@ -20,3 +20,4 @@ from .easydata_cls import EasyDataCls
 from .easydata_det import EasyDataDet
 from .easydata_seg import EasyDataSeg
 from .dataset import generate_minibatch
+from .analysis import Seg
--- a/paddlex/cv/datasets/analysis.py
+++ b/paddlex/cv/datasets/analysis.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+import numpy as np
+import os.path as osp
+import cv2
+from PIL import Image
+import pickle
+import threading
+import multiprocessing as mp
+import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
+from paddlex.cv.transforms.seg_transforms import Compose
+from .dataset import get_encoding
+class Seg:
+    def __init__(self, data_dir, file_list, label_list):
+        self.data_dir = data_dir
+        self.file_list_path = file_list
+        self.file_list = list()
+        self.labels = list()
+        with open(label_list, encoding=get_encoding(label_list)) as f:
+            for line in f:
+                item = line.strip()
+                self.labels.append(item)
+        with open(file_list, encoding=get_encoding(file_list)) as f:
+            for line in f:
+                items = line.strip().split()
+                if len(items) > 2:
+                    raise Exception(
+                        "A space is defined as the separator, but it exists in image or label name {}."
+                        .format(line))
+                items[0] = path_normalization(items[0])
+                items[1] = path_normalization(items[1])
+                full_path_im = osp.join(data_dir, items[0])
+                full_path_label = osp.join(data_dir, items[1])
+                if not osp.exists(full_path_im):
+                    raise IOError('The image file {} is not exist!'.format(
+                        full_path_im))
+                if not osp.exists(full_path_label):
+                    raise IOError('The image file {} is not exist!'.format(
+                        full_path_label))
+                self.file_list.append([full_path_im, full_path_label])
+        self.num_samples = len(self.file_list)
+    def _get_shape(self):
+        max_height = max(self.im_height_list)
+        max_width = max(self.im_width_list)
+        min_height = min(self.im_height_list)
+        min_width = min(self.im_width_list)
+        shape_info = {
+            'max_height': max_height,
+            'max_width': max_width,
+            'min_height': min_height,
+            'min_width': min_width,
+        }
+        return shape_info
+    def _get_label_pixel_info(self):
+        pixel_num = np.dot(self.im_height_list, self.im_width_list)
+        label_pixel_info = dict()
+        for label_value, label_value_num in zip(self.label_value_list,
+                                                self.label_value_num_list):
+            for v, n in zip(label_value, label_value_num):
+                if v not in label_pixel_info.keys():
+                    label_pixel_info[v] = [n, float(n) / float(pixel_num)]
+                else:
+                    label_pixel_info[v][0] += n
+                    label_pixel_info[v][1] += float(n) / float(pixel_num)
+        return label_pixel_info
+    def _get_image_pixel_info(self):
+        channel = max([len(im_value) for im_value in self.im_value_list])
+        im_pixel_info = [dict() for c in range(channel)]
+        for im_value, im_value_num in zip(self.im_value_list,
+                                          self.im_value_num_list):
+            for c in range(channel):
+                for v, n in zip(im_value[c], im_value_num[c]):
+                    if v not in im_pixel_info[c].keys():
+                        im_pixel_info[c][v] = n
+                    else:
+                        im_pixel_info[c][v] += n
+        return im_pixel_info
+    def _get_mean_std(self):
+        im_mean = np.asarray(self.im_mean_list)
+        im_mean = im_mean.sum(axis=0)
+        im_mean = im_mean / len(self.file_list)
+        im_mean /= self.max_im_value - self.min_im_value
+        im_std = np.asarray(self.im_std_list)
+        im_std = im_std.sum(axis=0)
+        im_std = im_std / len(self.file_list)
+        im_std /= self.max_im_value - self.min_im_value
+        return (im_mean, im_std)
+    def _get_image_info(self, start, end):
+        for id in range(start, end):
+            full_path_im, full_path_label = self.file_list[id]
+            image, label = Compose.decode_image(full_path_im, full_path_label)
+            height, width, channel = image.shape
+            self.im_height_list[id] = height
+            self.im_width_list[id] = width
+            self.im_channel_list[id] = channel
+            self.im_mean_list[
+                id] = [image[:, :, c].mean() for c in range(channel)]
+            self.im_std_list[
+                id] = [image[:, :, c].std() for c in range(channel)]
+            for c in range(channel):
+                unique, counts = np.unique(image[:, :, c], return_counts=True)
+                self.im_value_list[id].extend([unique])
+                self.im_value_num_list[id].extend([counts])
+            unique, counts = np.unique(label, return_counts=True)
+            self.label_value_list[id] = unique
+            self.label_value_num_list[id] = counts
+    def _get_clipped_mean_std(self, start, end, clip_min_value,
+                              clip_max_value):
+        for id in range(start, end):
+            full_path_im, full_path_label = self.file_list[id]
+            image, label = Compose.decode_image(full_path_im, full_path_label)
+            for c in range(self.channel_num):
+                np.clip(
+                    image[:, :, c],
+                    clip_min_value[c],
+                    clip_max_value[c],
+                    out=image[:, :, c])
+                image[:, :, c] -= clip_min_value[c]
+                image[:, :, c] /= clip_max_value[c] - clip_min_value[c]
+            self.clipped_im_mean_list[id] = [
+                image[:, :, c].mean() for c in range(self.channel_num)
+            ]
+            self.clipped_im_std_list[
+                id] = [image[:, :, c].std() for c in range(self.channel_num)]
+    def analysis(self):
+        self.im_mean_list = [[] for i in range(len(self.file_list))]
+        self.im_std_list = [[] for i in range(len(self.file_list))]
+        self.im_value_list = [[] for i in range(len(self.file_list))]
+        self.im_value_num_list = [[] for i in range(len(self.file_list))]
+        self.im_height_list = np.zeros(len(self.file_list), dtype='int32')
+        self.im_width_list = np.zeros(len(self.file_list), dtype='int32')
+        self.im_channel_list = np.zeros(len(self.file_list), dtype='int32')
+        self.label_value_list = [[] for i in range(len(self.file_list))]
+        self.label_value_num_list = [[] for i in range(len(self.file_list))]
+        num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
+        threads = []
+        one_worker_file = len(self.file_list) // num_workers
+        for i in range(num_workers):
+            start = one_worker_file * i
+            end = one_worker_file * (
+                i + 1) if i < num_workers - 1 else len(self.file_list)
+            t = threading.Thread(
+                target=self._get_image_info, args=(start, end))
+            threads.append(t)
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+        unique, counts = np.unique(self.im_channel_list, return_counts=True)
+        if len(unique) > 1:
+            raise Exception("There are {} kinds of image channels: {}.".format(
+                len(unique), unique[:]))
+        self.channel_num = unique[0]
+        shape_info = self._get_shape()
+        self.max_height = shape_info['max_height']
+        self.max_width = shape_info['max_width']
+        self.min_height = shape_info['min_height']
+        self.min_width = shape_info['min_width']
+        self.label_pixel_info = self._get_label_pixel_info()
+        self.im_pixel_info = self._get_image_pixel_info()
+        mode = osp.split(self.file_list_path)[-1].split('.')[0]
+        import matplotlib.pyplot as plt
+        for c in range(self.channel_num):
+            plt.figure()
+            plt.bar(self.im_pixel_info[c].keys(),
+                    self.im_pixel_info[c].values(),
+                    width=1,
+                    log=True)
+            plt.xlabel('image pixel value')
+            plt.ylabel('number')
+            plt.title('channel={}'.format(c))
+            plt.savefig(
+                osp.join(self.data_dir,
+                         '{}_channel{}_distribute.png'.format(mode, c)),
+                dpi=100)
+            plt.close()
+        max_im_value = list()
+        min_im_value = list()
+        for c in range(self.channel_num):
+            max_im_value.append(max(self.im_pixel_info[c].keys()))
+            min_im_value.append(min(self.im_pixel_info[c].keys()))
+        self.max_im_value = np.asarray(max_im_value)
+        self.min_im_value = np.asarray(min_im_value)
+        im_mean, im_std = self._get_mean_std()
+        info = {
+            'channel_num': self.channel_num,
+            'image_pixel': self.im_pixel_info,
+            'label_pixel': self.label_pixel_info,
+            'file_num': len(self.file_list),
+            'max_height': self.max_height,
+            'max_width': self.max_width,
+            'min_height': self.min_height,
+            'min_width': self.min_width,
+            'max_image_value': self.max_im_value,
+            'min_image_value': self.min_im_value
+        }
+        saved_pkl_file = osp.join(self.data_dir,
+                                  '{}_infomation.pkl'.format(mode))
+        with open(osp.join(saved_pkl_file), 'wb') as f:
+            pickle.dump(info, f)
+        logging.info(
+            "############## The analysis results are as follows ##############\n"
+        )
+        logging.info("{} samples in file {}\n".format(
+            len(self.file_list), self.file_list_path))
+        logging.info("Minimal image height: {} Minimal image width: {}.\n".
+                     format(self.min_height, self.min_width))
+        logging.info("Maximal image height: {} Maximal image width: {}.\n".
+                     format(self.max_height, self.max_width))
+        logging.info("Image channel is {}.\n".format(self.channel_num))
+        logging.info(
+            "Minimal image value: {} Maximal image value: {} (arranged in 0-{} channel order) \n".
+            format(self.min_im_value, self.max_im_value, self.channel_num))
+        logging.info(
+            "Image pixel distribution of each channel is saved with 'distribute.png' in the {}"
+            .format(self.data_dir))
+        logging.info(
+            "Image mean value: {} Image standard deviation: {} (normalized by the (max_im_value - min_im_value), arranged in 0-{} channel order).\n".
+            format(im_mean, im_std, self.channel_num))
+        logging.info(
+            "Label pixel information is shown in a format of (label_id, the number of label_id, the ratio of label_id):"
+        )
+        for v, (n, r) in self.label_pixel_info.items():
+            logging.info("({}, {}, {})".format(v, n, r))
+        logging.info("Dataset information is saved in {}".format(
+            saved_pkl_file))
+    def cal_clipped_mean_std(self, clip_min_value, clip_max_value,
+                             data_info_file):
+        if not osp.exists(data_info_file):
+            raise Exception("Dataset information file {} does not exist.".
+                            format(data_info_file))
+        with open(data_info_file, 'rb') as f:
+            im_info = pickle.load(f)
+        channel_num = im_info['channel_num']
+        min_im_value = im_info['min_image_value']
+        max_im_value = im_info['max_image_value']
+        im_pixel_info = im_info['image_pixel']
+        if len(clip_min_value) != channel_num or len(
+                clip_max_value) != channel_num:
+            raise Exception(
+                "The length of clip_min_value or clip_max_value should be equal to the number of image channel {}."
+                .format(channle_num))
+        for c in range(channel_num):
+            if clip_min_value[c] < min_im_value[c] or clip_min_value[
+                    c] > max_im_value[c]:
+                raise Exception(
+                    "Clip_min_value of the channel {} is not in [{}, {}]".
+                    format(c, min_im_value[c], max_im_value[c]))
+            if clip_max_value[c] < min_im_value[c] or clip_max_value[
+                    c] > max_im_value[c]:
+                raise Exception(
+                    "Clip_max_value of the channel {} is not in [{}, {}]".
+                    format(c, min_im_value[c], self.max_im_value[c]))
+        self.clipped_im_mean_list = [[] for i in range(len(self.file_list))]
+        self.clipped_im_std_list = [[] for i in range(len(self.file_list))]
+        num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
+        threads = []
+        one_worker_file = len(self.file_list) // num_workers
+        self.channel_num = channel_num
+        for i in range(num_workers):
+            start = one_worker_file * i
+            end = one_worker_file * (
+                i + 1) if i < num_workers - 1 else len(self.file_list)
+            t = threading.Thread(
+                target=self._get_clipped_mean_std,
+                args=(start, end, clip_min_value, clip_max_value))
+            threads.append(t)
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+        im_mean = np.asarray(self.clipped_im_mean_list)
+        im_mean = im_mean.sum(axis=0)
+        im_mean = im_mean / len(self.file_list)
+        im_std = np.asarray(self.clipped_im_std_list)
+        im_std = im_std.sum(axis=0)
+        im_std = im_std / len(self.file_list)
+        for c in range(channel_num):
+            clip_pixel_num = 0
+            pixel_num = sum(im_pixel_info[c].values())
+            for v, n in im_pixel_info[c].items():
+                if v < clip_min_value[c] or v > clip_max_value[c]:
+                    clip_pixel_num += n
+            logging.info("Channel {}, the ratio of pixels to be clipped = {}".
+                         format(c, clip_pixel_num / pixel_num))
+        logging.info(
+            "Image mean value: {} Image standard deviation: {} (normalized by (clip_max_value - clip_min_value), arranged in 0-{} channel order).\n".
+            format(im_mean, im_std, self.channel_num))
--- a/paddlex/cv/datasets/dataset.py
+++ b/paddlex/cv/datasets/dataset.py
@@ -239,9 +239,8 @@ def generate_minibatch(batch_data, label_padding_value=255, mapper=None):
                _, label_h, label_w = data[1].shape
                padding_label[:, :label_h, :label_w] = data[1]
                padding_batch.append((padding_im, padding_label))
-            elif len(data[1]) == 0 or isinstance(
+            elif len(data[1]) == 0 or isinstance(data[1][0], tuple) and data[
-                    data[1][0],
+                    1][0][0] in ['origin_shape', 'resize', 'padding']:
-                    tuple) and data[1][0][0] in ['resize', 'padding']:
                # padding the image and insert 'padding' into `im_info`
                # of segmentation during the infering phase
                if len(data[1]) == 0 or 'padding' not in [

--- a/paddlex/cv/datasets/imagenet.py
+++ b/paddlex/cv/datasets/imagenet.py
@@ -67,6 +67,10 @@ class ImageNet(Dataset):
        with open(file_list, encoding=get_encoding(file_list)) as f:
            for line in f:
                items = line.strip().split()
+                if len(items) > 2:
+                    raise Exception(
+                        "A space is defined as the separator, but it exists in image or label name {}."
+                        .format(line))
                items[0] = path_normalization(items[0])
                if not is_pic(items[0]):
                    continue

--- a/paddlex/cv/datasets/seg_dataset.py
+++ b/paddlex/cv/datasets/seg_dataset.py
@@ -20,7 +20,6 @@ import paddlex.utils.logging as logging
 from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import get_encoding
-from .dataset import is_pic
 class SegDataset(Dataset):
@@ -65,10 +64,12 @@ class SegDataset(Dataset):
        with open(file_list, encoding=get_encoding(file_list)) as f:
            for line in f:
                items = line.strip().split()
+                if len(items) > 2:
+                    raise Exception(
+                        "A space is defined as the separator, but it exists in image or label name {}."
+                        .format(line))
                items[0] = path_normalization(items[0])
                items[1] = path_normalization(items[1])
-                if not is_pic(items[0]):
-                    continue
                full_path_im = osp.join(data_dir, items[0])
                full_path_label = osp.join(data_dir, items[1])
                if not osp.exists(full_path_im):

--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
@@ -91,6 +91,10 @@ class VOCDetection(Dataset):
                line = fr.readline()
                if not line:
                    break
+                if len(line.strip().split()) > 2:
+                    raise Exception(
+                        "A space is defined as the separator, but it exists in image or label name {}."
+                        .format(line))
                img_file, xml_file = [osp.join(data_dir, x) \
                        for x in line.strip().split()[:2]]
                img_file = path_normalization(img_file)

--- a/paddlex/cv/models/base.py
+++ b/paddlex/cv/models/base.py
@@ -23,6 +23,7 @@ import yaml
 import copy
 import json
 import functools
+import multiprocessing as mp
 import paddlex.utils.logging as logging
 from paddlex.utils import seconds_to_hms
 from paddlex.utils.utils import EarlyStop
@@ -76,6 +77,16 @@ class BaseAPI:
        self.completed_epochs = 0
        self.scope = fluid.global_scope()
+        # 线程池，在模型在预测时用于对输入数据以图片为单位进行并行处理
+        # 主要用于batch_predict接口
+        thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8
+        self.thread_pool = mp.pool.ThreadPool(thread_num)
+    def reset_thread_pool(self, thread_num):
+        self.thread_pool.close()
+        self.thread_pool.join()
+        self.thread_pool = mp.pool.ThreadPool(thread_num)
    def _get_single_card_bs(self, batch_size):
        if batch_size % len(self.places) == 0:
            return int(batch_size // len(self.places))
@@ -356,23 +367,13 @@ class BaseAPI:
        ]
        test_outputs = list(self.test_outputs.values())
        with fluid.scope_guard(self.scope):
-            if self.__class__.__name__ == 'MaskRCNN':
+            fluid.io.save_inference_model(
-                from paddlex.utils.save import save_mask_inference_model
+                dirname=save_dir,
-                save_mask_inference_model(
+                executor=self.exe,
-                    dirname=save_dir,
+                params_filename='__params__',
-                    executor=self.exe,
+                feeded_var_names=test_input_names,
-                    params_filename='__params__',
+                target_vars=test_outputs,
-                    feeded_var_names=test_input_names,
+                main_program=self.test_prog)
-                    target_vars=test_outputs,
-                    main_program=self.test_prog)
-            else:
-                fluid.io.save_inference_model(
-                    dirname=save_dir,
-                    executor=self.exe,
-                    params_filename='__params__',
-                    feeded_var_names=test_input_names,
-                    target_vars=test_outputs,
-                    main_program=self.test_prog)
        model_info = self.get_model_info()
        model_info['status'] = 'Infer'

--- a/paddlex/cv/models/classifier.py
+++ b/paddlex/cv/models/classifier.py
@@ -279,16 +279,18 @@ class BaseClassifier(BaseAPI):
        return metrics
    @staticmethod
-    def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+    def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
        arrange_transforms(
            model_type=model_type,
            class_name=class_name,
            transforms=transforms,
            mode='test')
-        pool = ThreadPool(thread_num)
+        if thread_pool is not None:
-        batch_data = pool.map(transforms, images)
+            batch_data = thread_pool.map(transforms, images)
-        pool.close()
+        else:
-        pool.join()
+            batch_data = list()
+            for image in images:
+                batch_data.append(transforms(image))
        padding_batch = generate_minibatch(batch_data)
        im = np.array([data[0] for data in padding_batch])
@@ -344,15 +346,13 @@ class BaseClassifier(BaseAPI):
    def batch_predict(self,
                      img_file_list,
                      transforms=None,
-                      topk=1,
+                      topk=1):
-                      thread_num=2):
        """预测。
        Args:
            img_file_list(list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径
                也可以是解码后的排列格式为（H，W，C）且类型为float32且为BGR格式的数组。
            transforms (paddlex.cls.transforms): 数据预处理操作。
            topk (int): 预测时前k个最大值。
-            thread_num (int): 并发执行各图像预处理时的线程数。
        Returns:
            list: 每个元素都为列表，表示各图像的预测结果。在各图像的预测列表中，其中元素均为字典。字典的关键字为'category_id'、'category'、'score'，
            分别对应预测类别id、预测类别标签、预测得分。
@@ -367,7 +367,7 @@ class BaseClassifier(BaseAPI):
            transforms = self.test_transforms
        im = BaseClassifier._preprocess(img_file_list, transforms,
                                        self.model_type,
-                                        self.__class__.__name__, thread_num)
+                                        self.__class__.__name__, self.thread_pool)
        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,

--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
@@ -24,6 +24,7 @@ import paddlex.utils.logging as logging
 import paddlex
 from paddlex.cv.transforms import arrange_transforms
 from paddlex.cv.datasets import generate_minibatch
+from paddlex.cv.transforms.seg_transforms import Compose
 from collections import OrderedDict
 from .base import BaseAPI
 from .utils.seg_eval import ConfusionMatrix
@@ -54,6 +55,8 @@ class DeepLabv3p(BaseAPI):
        pooling_crop_size (list): 当backbone为MobileNetV3_large_x1_0_ssld时，需设置为训练过程中模型输入大小, 格式为[W, H]。
            在encoder模块中获取图像平均值时被用到，若为None，则直接求平均值；若为模型输入大小，则使用'pool'算子得到平均值。
            默认值为None。
+        input_channel (int): 输入图像通道数。默认值3。
    Raises:
        ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
        ValueError: backbone取值不在['Xception65', 'Xception41', 'MobileNetV2_x0.25',
@@ -75,7 +78,8 @@ class DeepLabv3p(BaseAPI):
                 use_dice_loss=False,
                 class_weight=None,
                 ignore_index=255,
-                 pooling_crop_size=None):
+                 pooling_crop_size=None,
+                 input_channel=3):
        self.init_params = locals()
        super(DeepLabv3p, self).__init__('segmenter')
        # dice_loss或bce_loss只适用两类分割中
@@ -149,6 +153,7 @@ class DeepLabv3p(BaseAPI):
            if self.output_is_logits:
                self.conv_filters = self.num_classes
            self.backbone_lr_mult_list = [0.15, 0.35, 0.65, 0.85, 1]
+        self.input_channel = input_channel
    def _get_backbone(self, backbone):
        def mobilenetv2(backbone):
@@ -236,7 +241,8 @@ class DeepLabv3p(BaseAPI):
            add_image_level_feature=self.add_image_level_feature,
            use_sum_merge=self.use_sum_merge,
            conv_filters=self.conv_filters,
-            output_is_logits=self.output_is_logits)
+            output_is_logits=self.output_is_logits,
+            input_channel=self.input_channel)
        inputs = model.generate_inputs()
        model_out = model.build_net(inputs)
        outputs = OrderedDict()
@@ -443,16 +449,22 @@ class DeepLabv3p(BaseAPI):
        return metrics
    @staticmethod
-    def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+    def _preprocess(images,
+                    transforms,
+                    model_type,
+                    class_name,
+                    thread_pool=None):
        arrange_transforms(
            model_type=model_type,
            class_name=class_name,
            transforms=transforms,
            mode='test')
-        pool = ThreadPool(thread_num)
+        if thread_pool is not None:
-        batch_data = pool.map(transforms, images)
+            batch_data = thread_pool.map(transforms, images)
-        pool.close()
+        else:
-        pool.join()
+            batch_data = list()
+            for image in images:
+                batch_data.append(transforms(image))
        padding_batch = generate_minibatch(batch_data)
        im = np.array(
            [data[0] for data in padding_batch],
@@ -517,13 +529,12 @@ class DeepLabv3p(BaseAPI):
        preds = DeepLabv3p._postprocess(result, im_info)
        return preds[0]
-    def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+    def batch_predict(self, img_file_list, transforms=None):
        """预测。
        Args:
            img_file_list(list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径
                也可以是解码后的排列格式为（H，W，C）且类型为float32且为BGR格式的数组。
            transforms(paddlex.cv.transforms): 数据预处理操作。
-            thread_num (int): 并发执行各图像预处理时的线程数。
        Returns:
            list: 每个元素都为列表，表示各图像的预测结果。各图像的预测结果用字典表示，包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，
@@ -538,7 +549,7 @@ class DeepLabv3p(BaseAPI):
            transforms = self.test_transforms
        im, im_info = DeepLabv3p._preprocess(
            img_file_list, transforms, self.model_type,
-            self.__class__.__name__, thread_num)
+            self.__class__.__name__, self.thread_pool)
        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,
@@ -549,86 +560,18 @@ class DeepLabv3p(BaseAPI):
        preds = DeepLabv3p._postprocess(result, im_info)
        return preds
-    def tile_predict(self,
-                     img_file,
-                     tile_size=[512, 512],
-                     batch_size=32,
-                     thread_num=8,
-                     transforms=None):
-        """无重叠的大图切小图预测。
-        Args:
-            img_file(str|np.ndarray): 预测图像路径，或者是解码后的排列格式为（H, W, C）且类型为float32且为BGR格式的数组。
-            tile_size(list|tuple): 切分小块的大小，格式为（W，H）。默认值为[512, 512]。
-            batch_size(int)：对小块进行批量预测时的批量大小。默认值为32。
-            thread_num (int): 并发执行各小块预处理时的线程数。默认值为8。
-            transforms(paddlex.cv.transforms): 数据预处理操作。
-        Returns:
-            dict: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图，
-                像素值表示对应的类别，'score_map'存储各类别的概率，shape=(h, w, num_classes)
-        """
-        if transforms is None and not hasattr(self, 'test_transforms'):
-            raise Exception("transforms need to be defined, now is None.")
-        if isinstance(img_file, str):
-            image = cv2.imread(img_file)
-        elif isinstance(img_file, np.ndarray):
-            image = img_file.copy()
-        else:
-            raise Exception("im_file must be list/tuple")
-        height, width, channel = image.shape
-        image_tile_list = list()
-        # crop the image into tile pieces
-        for h in range(0, height, tile_size[1]):
-            for w in range(0, width, tile_size[0]):
-                left = w
-                upper = h
-                right = min(w + tile_size[0], width)
-                lower = min(h + tile_size[1], height)
-                image_tile = image[upper:lower, left:right, :]
-                image_tile_list.append(image_tile)
-        # predict
-        label_map = np.zeros((height, width), dtype=np.uint8)
-        score_map = np.zeros(
-            (height, width, self.num_classes), dtype=np.float32)
-        num_tiles = len(image_tile_list)
-        for i in range(0, num_tiles, batch_size):
-            begin = i
-            end = min(i + batch_size, num_tiles)
-            res = self.batch_predict(
-                img_file_list=image_tile_list[begin:end],
-                thread_num=thread_num,
-                transforms=transforms)
-            for j in range(begin, end):
-                h_id = j // (width // tile_size[0] + 1)
-                w_id = j % (width // tile_size[0] + 1)
-                left = w_id * tile_size[0]
-                upper = h_id * tile_size[1]
-                right = min((w_id + 1) * tile_size[0], width)
-                lower = min((h_id + 1) * tile_size[1], height)
-                label_map[upper:lower, left:right] = res[j - begin][
-                    "label_map"]
-                score_map[upper:lower, left:right, :] = res[j - begin][
-                    "score_map"]
-        result = {"label_map": label_map, "score_map": score_map}
-        return result
    def overlap_tile_predict(self,
                             img_file,
                             tile_size=[512, 512],
                             pad_size=[64, 64],
                             batch_size=32,
-                             thread_num=8):
+                             transforms=None):
        """有重叠的大图切小图预测。
        Args:
            img_file(str|np.ndarray): 预测图像路径，或者是解码后的排列格式为（H, W, C）且类型为float32且为BGR格式的数组。
-            tile_size(list|tuple): 切分小块中间部分用于拼接预测结果的大小，格式为（W，H）。默认值为[512, 512]。
+            tile_size(list|tuple): 滑动窗口的大小，该区域内用于拼接预测结果，格式为（W，H）。默认值为[512, 512]。
-            pad_size(list|tuple): 切分小块向四周扩展的大小，格式为（W，H）。默认值为[64，64]。
+            pad_size(list|tuple): 滑动窗口向四周扩展的大小，扩展区域内不用于拼接预测结果，格式为（W，H）。默认值为[64，64]。
-            batch_size(int)：对小块进行批量预测时的批量大小。默认值为32
+            batch_size(int)：对窗口进行批量预测时的批量大小。默认值为32
-            thread_num (int): 并发执行各小块预处理时的线程数。默认值为8。
            transforms(paddlex.cv.transforms): 数据预处理操作。
@@ -641,7 +584,7 @@ class DeepLabv3p(BaseAPI):
            raise Exception("transforms need to be defined, now is None.")
        if isinstance(img_file, str):
-            image = cv2.imread(img_file)
+            image, _ = Compose.decode_image(img_file, None)
        elif isinstance(img_file, np.ndarray):
            image = img_file.copy()
        else:
@@ -651,28 +594,37 @@ class DeepLabv3p(BaseAPI):
        image_tile_list = list()
        # Padding along the left and right sides
-        left_pad = cv2.flip(image[0:height, 0:pad_size[0], :], 1)
+        if pad_size[0] > 0:
-        right_pad = cv2.flip(image[0:height, -pad_size[0]:width, :], 1)
+            left_pad = cv2.flip(image[0:height, 0:pad_size[0], :], 1)
-        padding_image = cv2.hconcat([left_pad, image])
+            right_pad = cv2.flip(image[0:height, -pad_size[0]:width, :], 1)
-        padding_image = cv2.hconcat([padding_image, right_pad])
+            padding_image = cv2.hconcat([left_pad, image])
+            padding_image = cv2.hconcat([padding_image, right_pad])
+        else:
+            import copy
+            padding_image = copy.deepcopy(image)
        # Padding along the upper and lower sides
        padding_height, padding_width, _ = padding_image.shape
-        upper_pad = cv2.flip(padding_image[0:pad_size[1], 0:padding_width, :],
+        if pad_size[1] > 0:
-                             0)
+            upper_pad = cv2.flip(
-        lower_pad = cv2.flip(
+                padding_image[0:pad_size[1], 0:padding_width, :], 0)
-            padding_image[-pad_size[1]:padding_height, 0:padding_width, :], 0)
+            lower_pad = cv2.flip(
-        padding_image = cv2.vconcat([upper_pad, padding_image])
+                padding_image[-pad_size[1]:padding_height, 0:padding_width, :],
-        padding_image = cv2.vconcat([padding_image, lower_pad])
+                0)
+            padding_image = cv2.vconcat([upper_pad, padding_image])
+            padding_image = cv2.vconcat([padding_image, lower_pad])
-        padding_height, padding_width, _ = padding_image.shape
        # crop the padding image into tile pieces
-        for h in range(0, padding_height, tile_size[1]):
+        padding_height, padding_width, _ = padding_image.shape
-            for w in range(0, padding_width, tile_size[0]):
-                left = w
+        for h_id in range(0, height // tile_size[1] + 1):
-                upper = h
+            for w_id in range(0, width // tile_size[0] + 1):
-                right = min(w + tile_size[0] + pad_size[0] * 2, padding_width)
+                left = w_id * tile_size[0]
-                lower = min(h + tile_size[1] + pad_size[1] * 2, padding_height)
+                upper = h_id * tile_size[1]
+                right = min(left + tile_size[0] + pad_size[0] * 2,
+                            padding_width)
+                lower = min(upper + tile_size[1] + pad_size[1] * 2,
+                            padding_height)
                image_tile = padding_image[upper:lower, left:right, :]
                image_tile_list.append(image_tile)
@@ -686,7 +638,6 @@ class DeepLabv3p(BaseAPI):
            end = min(i + batch_size, num_tiles)
            res = self.batch_predict(
                img_file_list=image_tile_list[begin:end],
-                thread_num=thread_num,
                transforms=transforms)
            for j in range(begin, end):
                h_id = j // (width // tile_size[0] + 1)
@@ -697,9 +648,13 @@ class DeepLabv3p(BaseAPI):
                lower = min((h_id + 1) * tile_size[1], height)
                tile_label_map = res[j - begin]["label_map"]
                tile_score_map = res[j - begin]["score_map"]
+                tile_upper = pad_size[1]
+                tile_lower = tile_label_map.shape[0] - pad_size[1]
+                tile_left = pad_size[0]
+                tile_right = tile_label_map.shape[1] - pad_size[0]
                label_map[upper:lower, left:right] = \
-                    tile_label_map[pad_size[1]:-pad_size[1], pad_size[0]:-pad_size[0]]
+                    tile_label_map[tile_upper:tile_lower, tile_left:tile_right]
                score_map[upper:lower, left:right, :] = \
-                    tile_score_map[pad_size[1]:-pad_size[1], pad_size[0]:-pad_size[0], :]
+                    tile_score_map[tile_upper:tile_lower, tile_left:tile_right, :]
        result = {"label_map": label_map, "score_map": score_map}
        return result
--- a/paddlex/cv/models/fast_scnn.py
+++ b/paddlex/cv/models/fast_scnn.py
@@ -36,6 +36,8 @@ class FastSCNN(DeepLabv3p):
            也支持计算两个分支或三个分支上的loss，权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列，
            fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重，higher_branch_weight为空间细节分支上的loss权重，
            lower_branch_weight为全局上下文分支上的loss权重，若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
+        input_channel (int): 输入图像通道数。默认值3。
    Raises:
        ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
@@ -52,7 +54,8 @@ class FastSCNN(DeepLabv3p):
                 use_dice_loss=False,
                 class_weight=None,
                 ignore_index=255,
-                 multi_loss_weight=[1.0]):
+                 multi_loss_weight=[1.0],
+                 input_channel=3):
        self.init_params = locals()
        super(DeepLabv3p, self).__init__('segmenter')
        # dice_loss或bce_loss只适用两类分割中
@@ -93,6 +96,7 @@ class FastSCNN(DeepLabv3p):
        self.ignore_index = ignore_index
        self.labels = None
        self.fixed_input_shape = None
+        self.input_channel = input_channel
    def build_net(self, mode='train'):
        model = paddlex.cv.nets.segmentation.FastSCNN(
@@ -103,7 +107,8 @@ class FastSCNN(DeepLabv3p):
            class_weight=self.class_weight,
            ignore_index=self.ignore_index,
            multi_loss_weight=self.multi_loss_weight,
-            fixed_input_shape=self.fixed_input_shape)
+            fixed_input_shape=self.fixed_input_shape,
+            input_channel=self.input_channel)
        inputs = model.generate_inputs()
        model_out = model.build_net(inputs)
        outputs = OrderedDict()

--- a/paddlex/cv/models/faster_rcnn.py
+++ b/paddlex/cv/models/faster_rcnn.py
@@ -376,16 +376,18 @@ class FasterRCNN(BaseAPI):
        return metrics
    @staticmethod
-    def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+    def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
        arrange_transforms(
            model_type=model_type,
            class_name=class_name,
            transforms=transforms,
            mode='test')
-        pool = ThreadPool(thread_num)
+        if thread_pool is not None:
-        batch_data = pool.map(transforms, images)
+            batch_data = thread_pool.map(transforms, images)
-        pool.close()
+        else:
-        pool.join()
+            batch_data = list()
+            for image in images:
+                batch_data.append(transforms(image))
        padding_batch = generate_minibatch(batch_data)
        im = np.array([data[0] for data in padding_batch])
        im_resize_info = np.array([data[1] for data in padding_batch])
@@ -453,14 +455,13 @@ class FasterRCNN(BaseAPI):
        return preds[0]
-    def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+    def batch_predict(self, img_file_list, transforms=None):
        """预测。
        Args:
            img_file_list(list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径
                也可以是解码后的排列格式为（H，W，C）且类型为float32且为BGR格式的数组。
            transforms (paddlex.det.transforms): 数据预处理操作。
-            thread_num (int): 并发执行各图像预处理时的线程数。
        Returns:
            list: 每个元素都为列表，表示各图像的预测结果。在各图像的预测结果列表中，每个预测结果由预测框类别标签、
@@ -477,7 +478,7 @@ class FasterRCNN(BaseAPI):
            transforms = self.test_transforms
        im, im_resize_info, im_shape = FasterRCNN._preprocess(
            img_file_list, transforms, self.model_type,
-            self.__class__.__name__, thread_num)
+            self.__class__.__name__, self.thread_pool)
        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,

--- a/paddlex/cv/models/hrnet.py
+++ b/paddlex/cv/models/hrnet.py
@@ -34,6 +34,7 @@ class HRNet(DeepLabv3p):
            自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，
            即平时使用的交叉熵损失函数。
        ignore_index (int): label上忽略的值，label为ignore_index的像素不参与损失函数的计算。默认255。
+        input_channel (int): 输入图像通道数。默认值3。
    Raises:
        ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
@@ -48,7 +49,8 @@ class HRNet(DeepLabv3p):
                 use_bce_loss=False,
                 use_dice_loss=False,
                 class_weight=None,
-                 ignore_index=255):
+                 ignore_index=255,
+                 input_channel=3):
        self.init_params = locals()
        super(DeepLabv3p, self).__init__('segmenter')
        # dice_loss或bce_loss只适用两类分割中
@@ -79,6 +81,7 @@ class HRNet(DeepLabv3p):
        self.ignore_index = ignore_index
        self.labels = None
        self.fixed_input_shape = None
+        self.input_channel = input_channel
    def build_net(self, mode='train'):
        model = paddlex.cv.nets.segmentation.HRNet(
@@ -89,7 +92,8 @@ class HRNet(DeepLabv3p):
            use_dice_loss=self.use_dice_loss,
            class_weight=self.class_weight,
            ignore_index=self.ignore_index,
-            fixed_input_shape=self.fixed_input_shape)
+            fixed_input_shape=self.fixed_input_shape,
+            input_channel=self.input_channel)
        inputs = model.generate_inputs()
        model_out = model.build_net(inputs)
        outputs = OrderedDict()

--- a/paddlex/cv/models/mask_rcnn.py
+++ b/paddlex/cv/models/mask_rcnn.py
@@ -408,14 +408,13 @@ class MaskRCNN(FasterRCNN):
        return preds[0]
-    def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+    def batch_predict(self, img_file_list, transforms=None):
        """预测。
        Args:
            img_file_list(list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径
                也可以是解码后的排列格式为（H，W，C）且类型为float32且为BGR格式的数组。
            transforms (paddlex.det.transforms): 数据预处理操作。
-            thread_num (int): 并发执行各图像预处理时的线程数。
        Returns:
            dict: 每个元素都为列表，表示各图像的预测结果。在各图像的预测结果列表中，每个预测结果由预测框类别标签、预测框类别名称、
                  预测框坐标(坐标格式为[xmin, ymin, w, h]）、
@@ -432,7 +431,7 @@ class MaskRCNN(FasterRCNN):
            transforms = self.test_transforms
        im, im_resize_info, im_shape = FasterRCNN._preprocess(
            img_file_list, transforms, self.model_type,
-            self.__class__.__name__, thread_num)
+            self.__class__.__name__, self.thread_pool)
        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,

--- a/paddlex/cv/models/ppyolo.py
+++ b/paddlex/cv/models/ppyolo.py
@@ -18,6 +18,7 @@ import tqdm
 import os.path as osp
 import numpy as np
 from multiprocessing.pool import ThreadPool
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
 from paddle.fluid.optimizer import ExponentialMovingAverage
@@ -36,7 +37,7 @@ class PPYOLO(BaseAPI):
    Args:
        num_classes (int): 类别数。默认为80。
-        backbone (str): PPYOLO的backbone网络，取值范围为['ResNet50_vd']。默认为'ResNet50_vd'。
+        backbone (str): PPYOLO的backbone网络，取值范围为['ResNet50_vd_ssld']。默认为'ResNet50_vd_ssld'。
        with_dcn_v2 (bool): Backbone是否使用DCNv2结构。默认为True。
        anchors (list|tuple): anchor框的宽度和高度，为None时表示使用默认值
                    [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
@@ -123,6 +124,9 @@ class PPYOLO(BaseAPI):
        self.use_ema = False
        self.with_dcn_v2 = with_dcn_v2
+        if paddle.__version__ < '1.8.4' and paddle.__version__ != '0.0.0':
+            raise Exception("PPYOLO requires paddlepaddle or paddlepaddle-gpu >= 1.8.4")
    def _get_backbone(self, backbone_name):
        if backbone_name.startswith('ResNet50_vd'):
            backbone = paddlex.cv.nets.ResNet(
@@ -447,16 +451,18 @@ class PPYOLO(BaseAPI):
        return evaluate_metrics
    @staticmethod
-    def _preprocess(images, transforms, model_type, class_name, thread_num=1):
+    def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
        arrange_transforms(
            model_type=model_type,
            class_name=class_name,
            transforms=transforms,
            mode='test')
-        pool = ThreadPool(thread_num)
+        if thread_pool is not None:
-        batch_data = pool.map(transforms, images)
+            batch_data = thread_pool.map(transforms, images)
-        pool.close()
+        else:
-        pool.join()
+            batch_data = list()
+            for image in images:
+                batch_data.append(transforms(image))
        padding_batch = generate_minibatch(batch_data)
        im = np.array(
            [data[0] for data in padding_batch],
@@ -520,14 +526,13 @@ class PPYOLO(BaseAPI):
                                    len(images), self.num_classes, self.labels)
        return preds[0]
-    def batch_predict(self, img_file_list, transforms=None, thread_num=2):
+    def batch_predict(self, img_file_list, transforms=None):
        """预测。
        Args:
            img_file_list (list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径，也可以是解码后的排列格式为（H，W，C）
                且类型为float32且为BGR格式的数组。
            transforms (paddlex.det.transforms): 数据预处理操作。
-            thread_num (int): 并发执行各图像预处理时的线程数。
        Returns:
            list: 每个元素都为列表，表示各图像的预测结果。在各图像的预测结果列表中，每个预测结果由预测框类别标签、
              预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h]）、
@@ -543,7 +548,7 @@ class PPYOLO(BaseAPI):
            transforms = self.test_transforms
        im, im_size = PPYOLO._preprocess(img_file_list, transforms,
                                         self.model_type,
-                                         self.__class__.__name__, thread_num)
+                                         self.__class__.__name__, self.thread_pool)
        with fluid.scope_guard(self.scope):
            result = self.exe.run(self.test_prog,

--- a/paddlex/cv/models/slim/prune_config.py
+++ b/paddlex/cv/models/slim/prune_config.py
@@ -91,7 +91,23 @@ sensitivities_data = {
    'DeepLabv3p_Xception65_aspp_decoder':
    'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception65_with_aspp_decoder.sensitivities',
    'DeepLabv3p_Xception41_aspp_decoder':
-    'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities'
+    'https://bj.bcebos.com/paddlex/slim_prune/deeplab_xception41_with_aspp_decoder.sensitivities',
+    'HRNet_W18_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w18.sensitivities',
+    'HRNet_W30_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w30.sensitivities',
+    'HRNet_W32_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w32.sensitivities',
+    'HRNet_W40_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w40.sensitivities',
+    'HRNet_W44_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w44.sensitivities',
+    'HRNet_W48_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w48.sensitivities',
+    'HRNet_W64_Seg':
+    'https://bj.bcebos.com/paddlex/slim_prune/hrnet_w64.sensitivities',
+    'FastSCNN':
+    'https://bj.bcebos.com/paddlex/slim_prune/fast_scnn.sensitivities'
 }
@@ -105,6 +121,8 @@ def get_sensitivities(flag, model, save_dir):
    elif hasattr(model, 'encoder_with_aspp') or hasattr(model,
                                                        'enable_decoder'):
        model_type = model_type + '_' + 'aspp' + '_' + 'decoder'
+    if model_type.startswith('HRNet') and model.model_type == 'segmenter':
+        model_type = '{}_W{}_Seg'.format(model_type, model.width)
    if osp.isfile(flag):
        return flag
    elif flag == 'DEFAULT':
@@ -244,6 +262,28 @@ def get_prune_params(model):
            if i in prune_names:
                prune_names.remove(i)
+    elif model_type.startswith('HRNet') and model.model_type == 'segmenter':
+        for param in program.global_block().all_parameters():
+            if 'weight' not in param.name:
+                continue
+            prune_names.append(param.name)
+        params_not_prune = ['conv-1_weights']
+        for i in params_not_prune:
+            if i in prune_names:
+                prune_names.remove(i)
+    elif model_type.startswith('FastSCNN'):
+        for param in program.global_block().all_parameters():
+            if 'weight' not in param.name:
+                continue
+            if 'dwise' in param.name or 'depthwise' in param.name or 'logit' in param.name:
+                continue
+            prune_names.append(param.name)
+        params_not_prune = ['classifier/weights']
+        for i in params_not_prune:
+            if i in prune_names:
+                prune_names.remove(i)
    elif model_type.startswith('DeepLabv3p'):
        for param in program.global_block().all_parameters():
            if 'weight' not in param.name:

--- a/paddlex/cv/models/unet.py
+++ b/paddlex/cv/models/unet.py
@@ -33,6 +33,7 @@ class UNet(DeepLabv3p):
            自行计算相应的权重，每一类的权重为：每类的比例 * num_classes。class_weight取默认值None是，各类的权重1，
            即平时使用的交叉熵损失函数。
        ignore_index (int): label上忽略的值，label为ignore_index的像素不参与损失函数的计算。默认255。
+        input_channel (int): 输入图像通道数。默认值3。
    Raises:
        ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
@@ -47,7 +48,8 @@ class UNet(DeepLabv3p):
                 use_bce_loss=False,
                 use_dice_loss=False,
                 class_weight=None,
-                 ignore_index=255):
+                 ignore_index=255,
+                 input_channel=3):
        self.init_params = locals()
        super(DeepLabv3p, self).__init__('segmenter')
        # dice_loss或bce_loss只适用两类分割中
@@ -78,6 +80,7 @@ class UNet(DeepLabv3p):
        self.ignore_index = ignore_index
        self.labels = None
        self.fixed_input_shape = None
+        self.input_channel = input_channel
    def build_net(self, mode='train'):
        model = paddlex.cv.nets.segmentation.UNet(
@@ -88,7 +91,8 @@ class UNet(DeepLabv3p):
            use_dice_loss=self.use_dice_loss,
            class_weight=self.class_weight,
            ignore_index=self.ignore_index,
-            fixed_input_shape=self.fixed_input_shape)
+            fixed_input_shape=self.fixed_input_shape,
+            input_channel=self.input_channel)
        inputs = model.generate_inputs()
        model_out = model.build_net(inputs)
        outputs = OrderedDict()

--- a/paddlex/cv/models/utils/visualize.py
+++ b/paddlex/cv/models/utils/visualize.py
@@ -20,6 +20,7 @@ import numpy as np
 import time
 import paddlex.utils.logging as logging
 from .detection_eval import fixed_linspace, backup_linspace, loadRes
+from paddlex.cv.datasets.dataset import is_pic
 def visualize_detection(image, result, threshold=0.5, save_dir='./'):
@@ -44,7 +45,11 @@ def visualize_detection(image, result, threshold=0.5, save_dir='./'):
        return image
-def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
+def visualize_segmentation(image,
+                           result,
+                           weight=0.6,
+                           save_dir='./',
+                           color=None):
    """
    Convert segment result to color image, and save added image.
    Args:
@@ -52,10 +57,14 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
        result: the predict result of image
        weight: the image weight of visual image, and the result weight is (1 - weight)
        save_dir: the directory for saving visual image
+        color: the list of a BGR-mode color for each label.
    """
    label_map = result['label_map']
    color_map = get_color_map_list(256)
+    if color is not None:
+        color_map[0:len(color) // 3][:] = color
    color_map = np.array(color_map).astype("uint8")
    # Use OpenCV LUT for color mapping
    c1 = cv2.LUT(label_map, color_map[:, 0])
    c2 = cv2.LUT(label_map, color_map[:, 1])
@@ -65,11 +74,26 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
    if isinstance(image, np.ndarray):
        im = image
        image_name = str(int(time.time() * 1000)) + '.jpg'
+        if image.shape[2] != 3:
+            logging.info(
+                "The image is not 3-channel array, so predicted label map is shown as a pseudo color image."
+            )
+            weight = 0.
    else:
        image_name = os.path.split(image)[-1]
-        im = cv2.imread(image)
+        if not is_pic(image):
+            logging.info(
+                "The image cannot be opened by opencv, so predicted label map is shown as a pseudo color image."
+            )
+            image_name = image_name.split('.')[0] + '.jpg'
+            weight = 0.
+        else:
+            im = cv2.imread(image)
-    vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0)
+    if abs(weight) < 1e-5:
+        vis_result = pseudo_img
+    else:
+        vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0)
    if save_dir is not None:
        if not os.path.exists(save_dir):

--- a/paddlex/cv/nets/detection/loss/yolo_loss.py
+++ b/paddlex/cv/nets/detection/loss/yolo_loss.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import paddle
 from paddle import fluid
 try:
    from collections.abc import Sequence
@@ -67,19 +68,34 @@ class YOLOv3Loss(object):
                scale_x_y = self.scale_x_y if not isinstance(
                    self.scale_x_y, Sequence) else self.scale_x_y[i]
                anchor_mask = anchor_masks[i]
-                loss = fluid.layers.yolov3_loss(
+                if paddle.__version__ < '1.8.4' and paddle.__version__ != '0.0.0':
-                    x=output,
+                    loss = fluid.layers.yolov3_loss(
-                    gt_box=gt_box,
+                        x=output,
-                    gt_label=gt_label,
+                        gt_box=gt_box,
-                    gt_score=gt_score,
+                        gt_label=gt_label,
-                    anchors=anchors,
+                        gt_score=gt_score,
-                    anchor_mask=anchor_mask,
+                        anchors=anchors,
-                    class_num=num_classes,
+                        anchor_mask=anchor_mask,
-                    ignore_thresh=self._ignore_thresh,
+                        class_num=num_classes,
-                    downsample_ratio=self.downsample[i],
+                        ignore_thresh=self._ignore_thresh,
-                    use_label_smooth=self._label_smooth,
+                        downsample_ratio=self.downsample[i],
-                    scale_x_y=scale_x_y,
+                        use_label_smooth=self._label_smooth,
-                    name=prefix_name + "yolo_loss" + str(i))
+                        name=prefix_name + "yolo_loss" + str(i))
+                else:
+                    loss = fluid.layers.yolov3_loss(
+                        x=output,
+                        gt_box=gt_box,
+                        gt_label=gt_label,
+                        gt_score=gt_score,
+                        anchors=anchors,
+                        anchor_mask=anchor_mask,
+                        class_num=num_classes,
+                        ignore_thresh=self._ignore_thresh,
+                        downsample_ratio=self.downsample[i],
+                        use_label_smooth=self._label_smooth,
+                        scale_x_y=scale_x_y,
+                        name=prefix_name + "yolo_loss" + str(i))
                losses.append(fluid.layers.reduce_mean(loss))

--- a/paddlex/cv/nets/detection/yolo_v3.py
+++ b/paddlex/cv/nets/detection/yolo_v3.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numpy as np
+import paddle
 from paddle import fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
@@ -311,7 +314,7 @@ class YOLOv3:
    def _upsample(self, input, scale=2, name=None):
        out = fluid.layers.resize_nearest(
-            input=input, scale=float(scale), name=name)
+            input=input, scale=float(scale), name=name, align_corners=False)
        return out
    def _detection_block(self,
@@ -407,16 +410,28 @@ class YOLOv3:
            scale_x_y = self.scale_x_y if not isinstance(
                self.scale_x_y, Sequence) else self.scale_x_y[i]
-            box, score = fluid.layers.yolo_box(
+            if paddle.__version__ < '1.8.4' and paddle.__version__ != '0.0.0':
-                x=input,
+                box, score = fluid.layers.yolo_box(
-                img_size=im_size,
+                    x=input,
-                anchors=self.mask_anchors[i],
+                    img_size=im_size,
-                class_num=self.num_classes,
+                    anchors=self.mask_anchors[i],
-                conf_thresh=self.nms.score_threshold,
+                    class_num=self.num_classes,
-                downsample_ratio=self.downsample[i],
+                    conf_thresh=self.nms.score_threshold,
-                name=self.prefix_name + 'yolo_box' + str(i),
+                    downsample_ratio=self.downsample[i],
-                clip_bbox=self.clip_bbox,
+                    name=self.prefix_name + 'yolo_box' + str(i),
-                scale_x_y=self.scale_x_y)
+                    clip_bbox=self.clip_bbox)
+            else:
+                box, score = fluid.layers.yolo_box(
+                    x=input,
+                    img_size=im_size,
+                    anchors=self.mask_anchors[i],
+                    class_num=self.num_classes,
+                    conf_thresh=self.nms.score_threshold,
+                    downsample_ratio=self.downsample[i],
+                    name=self.prefix_name + 'yolo_box' + str(i),
+                    clip_bbox=self.clip_bbox,
+                    scale_x_y=self.scale_x_y)
            boxes.append(box)
            scores.append(fluid.layers.transpose(score, perm=[0, 2, 1]))

--- a/paddlex/cv/nets/hrnet.py
+++ b/paddlex/cv/nets/hrnet.py
@@ -235,10 +235,13 @@ class HRNet(object):
                        name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
                    if self.feature_maps == "stage4":
                        y = fluid.layers.resize_bilinear(
-                            input=y, out_shape=[height, width])
+                            input=y,
+                            out_shape=[height, width],
+                            align_corners=False,
+                            align_mode=1)
                    else:
                        y = fluid.layers.resize_nearest(
-                            input=y, scale=2**(j - i))
+                            input=y, scale=2**(j - i), align_corners=False)
                    residual = fluid.layers.elementwise_add(
                        x=residual, y=y, act=None)
                elif j < i:

--- a/paddlex/cv/nets/segmentation/deeplabv3p.py
+++ b/paddlex/cv/nets/segmentation/deeplabv3p.py
@@ -72,6 +72,7 @@ class DeepLabv3p(object):
    def __init__(self,
                 num_classes,
                 backbone,
+                 input_channel=3,
                 mode='train',
                 output_stride=16,
                 aspp_with_sep_conv=True,
@@ -115,6 +116,7 @@ class DeepLabv3p(object):
                    format(type(class_weight)))
        self.num_classes = num_classes
+        self.input_channel = input_channel
        self.backbone = backbone
        self.mode = mode
        self.use_bce_loss = use_bce_loss
@@ -402,13 +404,16 @@ class DeepLabv3p(object):
        if self.fixed_input_shape is not None:
            input_shape = [
-                None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
+                None, self.input_channel, self.fixed_input_shape[1],
+                self.fixed_input_shape[0]
            ]
            inputs['image'] = fluid.data(
                dtype='float32', shape=input_shape, name='image')
        else:
            inputs['image'] = fluid.data(
-                dtype='float32', shape=[None, 3, None, None], name='image')
+                dtype='float32',
+                shape=[None, self.input_channel, None, None],
+                name='image')
        if self.mode == 'train':
            inputs['label'] = fluid.data(
                dtype='int32', shape=[None, 1, None, None], name='label')

--- a/paddlex/cv/nets/segmentation/fast_scnn.py
+++ b/paddlex/cv/nets/segmentation/fast_scnn.py
@@ -33,6 +33,7 @@ from .model_utils.loss import bce_loss
 class FastSCNN(object):
    def __init__(self,
                 num_classes,
+                 input_channel=3,
                 mode='train',
                 use_bce_loss=False,
                 use_dice_loss=False,
@@ -62,6 +63,7 @@ class FastSCNN(object):
                    format(type(class_weight)))
        self.num_classes = num_classes
+        self.input_channel = input_channel
        self.mode = mode
        self.use_bce_loss = use_bce_loss
        self.use_dice_loss = use_dice_loss
@@ -137,13 +139,16 @@ class FastSCNN(object):
        inputs = OrderedDict()
        if self.fixed_input_shape is not None:
            input_shape = [
-                None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
+                None, self.input_channel, self.fixed_input_shape[1],
+                self.fixed_input_shape[0]
            ]
            inputs['image'] = fluid.data(
                dtype='float32', shape=input_shape, name='image')
        else:
            inputs['image'] = fluid.data(
-                dtype='float32', shape=[None, 3, None, None], name='image')
+                dtype='float32',
+                shape=[None, self.input_channel, None, None],
+                name='image')
        if self.mode == 'train':
            inputs['label'] = fluid.data(
                dtype='int32', shape=[None, 1, None, None], name='label')

--- a/paddlex/cv/nets/segmentation/hrnet.py
+++ b/paddlex/cv/nets/segmentation/hrnet.py
@@ -32,6 +32,7 @@ import paddlex
 class HRNet(object):
    def __init__(self,
                 num_classes,
+                 input_channel=3,
                 mode='train',
                 width=18,
                 use_bce_loss=False,
@@ -61,6 +62,7 @@ class HRNet(object):
                    format(type(class_weight)))
        self.num_classes = num_classes
+        self.input_channel = input_channel
        self.mode = mode
        self.use_bce_loss = use_bce_loss
        self.use_dice_loss = use_dice_loss
@@ -136,13 +138,16 @@ class HRNet(object):
        if self.fixed_input_shape is not None:
            input_shape = [
-                None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
+                None, self.input_channel, self.fixed_input_shape[1],
+                self.fixed_input_shape[0]
            ]
            inputs['image'] = fluid.data(
                dtype='float32', shape=input_shape, name='image')
        else:
            inputs['image'] = fluid.data(
-                dtype='float32', shape=[None, 3, None, None], name='image')
+                dtype='float32',
+                shape=[None, self.input_channel, None, None],
+                name='image')
        if self.mode == 'train':
            inputs['label'] = fluid.data(
                dtype='int32', shape=[None, 1, None, None], name='label')

--- a/paddlex/cv/nets/segmentation/unet.py
+++ b/paddlex/cv/nets/segmentation/unet.py
@@ -64,6 +64,7 @@ class UNet(object):
    def __init__(self,
                 num_classes,
+                 input_channel=3,
                 mode='train',
                 upsample_mode='bilinear',
                 use_bce_loss=False,
@@ -92,6 +93,7 @@ class UNet(object):
                    'Expect class_weight is a list or string but receive {}'.
                    format(type(class_weight)))
        self.num_classes = num_classes
+        self.input_channel = input_channel
        self.mode = mode
        self.upsample_mode = upsample_mode
        self.use_bce_loss = use_bce_loss
@@ -232,13 +234,16 @@ class UNet(object):
        if self.fixed_input_shape is not None:
            input_shape = [
-                None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
+                None, self.input_channel, self.fixed_input_shape[1],
+                self.fixed_input_shape[0]
            ]
            inputs['image'] = fluid.data(
                dtype='float32', shape=input_shape, name='image')
        else:
            inputs['image'] = fluid.data(
-                dtype='float32', shape=[None, 3, None, None], name='image')
+                dtype='float32',
+                shape=[None, self.input_channel, None, None],
+                name='image')
        if self.mode == 'train':
            inputs['label'] = fluid.data(
                dtype='int32', shape=[None, 1, None, None], name='label')

--- a/paddlex/cv/transforms/ops.py
+++ b/paddlex/cv/transforms/ops.py
@@ -18,11 +18,15 @@ import numpy as np
 from PIL import Image, ImageEnhance
-def normalize(im, mean, std):
+def normalize(im, mean, std, min_value=[0, 0, 0], max_value=[255, 255, 255]):
-    im = im / 255.0
+    # Rescaling (min-max normalization)
+    range_value = [max_value[i] - min_value[i] for i in range(len(max_value))]
+    im = (im - min_value) / range_value
+    # Standardization (Z-score Normalization)
    im -= mean
    im /= std
-    return im
+    return im.astype('float32')
 def permute(im, to_bgr=False):

--- a/paddlex/cv/transforms/seg_transforms.py
+++ b/paddlex/cv/transforms/seg_transforms.py
@@ -20,7 +20,11 @@ import os.path as osp
 import numpy as np
 from PIL import Image
 import cv2
+import imghdr
+import six
+import sys
 from collections import OrderedDict
 import paddlex.utils.logging as logging
@@ -60,6 +64,63 @@ class Compose(SegTransform):
                        "Elements in transforms should be defined in 'paddlex.seg.transforms' or class of imgaug.augmenters.Augmenter, see docs here: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/"
                    )
+    @staticmethod
+    def read_img(img_path):
+        img_format = imghdr.what(img_path)
+        name, ext = osp.splitext(img_path)
+        if img_format == 'tiff' or ext == '.img':
+            try:
+                import gdal
+            except:
+                six.reraise(*sys.exc_info())
+                raise Exception(
+                    "Please refer to https://github.com/PaddlePaddle/PaddleX/tree/develop/examples/multi-channel_remote_sensing/README.md to install gdal"
+                )
+            dataset = gdal.Open(img_path)
+            if dataset == None:
+                raise Exception('Can not open', img_path)
+            im_data = dataset.ReadAsArray()
+            return im_data.transpose((1, 2, 0))
+        elif img_format in ['jpeg', 'bmp', 'png']:
+            return cv2.imread(img_path)
+        elif ext == '.npy':
+            return np.load(img_path)
+        else:
+            raise Exception('Image format {} is not supported!'.format(ext))
+    @staticmethod
+    def decode_image(im, label):
+        if isinstance(im, np.ndarray):
+            if len(im.shape) != 3:
+                raise Exception(
+                    "im should be 3-dimensions, but now is {}-dimensions".
+                    format(len(im.shape)))
+        else:
+            try:
+                im = Compose.read_img(im)
+            except:
+                raise ValueError('Can\'t read The image file {}!'.format(im))
+        im = im.astype('float32')
+        if label is not None:
+            if isinstance(label, np.ndarray):
+                if len(label.shape) != 2:
+                    raise Exception(
+                        "label should be 2-dimensions, but now is {}-dimensions".
+                        format(len(label.shape)))
+            else:
+                try:
+                    label = np.asarray(Image.open(label))
+                except:
+                    ValueError('Can\'t read The label file {}!'.format(label))
+            im_height, im_width, _ = im.shape
+            label_height, label_width = label.shape
+            if im_height != label_height or im_width != label_width:
+                raise Exception(
+                    "The height or width of the image is not same as the label")
+        return (im, label)
    def __call__(self, im, im_info=None, label=None):
        """
        Args:
@@ -73,24 +134,12 @@ class Compose(SegTransform):
            tuple: 根据网络所需字段所组成的tuple；字段由transforms中的最后一个数据预处理操作决定。
        """
-        if isinstance(im, np.ndarray):
+        im, label = self.decode_image(im, label)
-            if len(im.shape) != 3:
-                raise Exception(
-                    "im should be 3-dimensions, but now is {}-dimensions".
-                    format(len(im.shape)))
-        else:
-            try:
-                im = cv2.imread(im)
-            except:
-                raise ValueError('Can\'t read The image file {}!'.format(im))
-        im = im.astype('float32')
-        if im_info is None:
-            im_info = [('origin_shape', im.shape[0:2])]
        if self.to_rgb:
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        if im_info is None:
+            im_info = [('origin_shape', im.shape[0:2])]
        if label is not None:
-            if not isinstance(label, np.ndarray):
-                label = np.asarray(Image.open(label))
            origin_label = label.copy()
        for op in self.transforms:
            if isinstance(op, SegTransform):
@@ -550,22 +599,35 @@ class ResizeStepScaling(SegTransform):
 class Normalize(SegTransform):
    """对图像进行标准化。
-    1.尺度缩放到 [0,1]。
+    1.像素值减去min_val
-    2.对图像进行减均值除以标准差操作。
+    2.像素值除以(max_val-min_val)
+    3.对图像进行减均值除以标准差操作。
    Args:
        mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
        std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
+        min_val (list): 图像数据集的最小值。默认值[0, 0, 0]。
+        max_val (list): 图像数据集的最大值。默认值[255.0, 255.0, 255.0]。
    Raises:
        ValueError: mean或std不是list对象。std包含0。
    """
-    def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+    def __init__(self,
+                 mean=[0.5, 0.5, 0.5],
+                 std=[0.5, 0.5, 0.5],
+                 min_val=[0, 0, 0],
+                 max_val=[255.0, 255.0, 255.0]):
+        self.min_val = min_val
+        self.max_val = max_val
        self.mean = mean
        self.std = std
        if not (isinstance(self.mean, list) and isinstance(self.std, list)):
            raise ValueError("{}: input type is invalid.".format(self))
+        if not (isinstance(self.min_val, list) and isinstance(self.max_val,
+                                                              list)):
+            raise ValueError("{}: input type is invalid.".format(self))
        from functools import reduce
        if reduce(lambda x, y: x * y, self.std) == 0:
            raise ValueError('{}: std is invalid!'.format(self))
@@ -588,7 +650,8 @@ class Normalize(SegTransform):
        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
        std = np.array(self.std)[np.newaxis, np.newaxis, :]
-        im = normalize(im, mean, std)
+        im = normalize(im, mean, std, self.min_val, self.max_val)
+        im = im.astype('float32')
        if label is None:
            return (im, im_info)
@@ -660,29 +723,29 @@ class Padding(SegTransform):
            target_width = self.target_size[0]
        pad_height = target_height - im_height
        pad_width = target_width - im_width
-        if pad_height < 0 or pad_width < 0:
-            logging.warning(
-                'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
-                .format(im_width, im_height, target_width, target_height))
        pad_height = max(pad_height, 0)
        pad_width = max(pad_width, 0)
-        im = cv2.copyMakeBorder(
+        if (pad_height > 0 or pad_width > 0):
-            im,
+            im_channel = im.shape[2]
-            0,
+            import copy
-            pad_height,
+            orig_im = copy.deepcopy(im)
-            0,
+            im = np.zeros((im_height + pad_height, im_width + pad_width,
-            pad_width,
+                           im_channel)).astype(orig_im.dtype)
-            cv2.BORDER_CONSTANT,
+            for i in range(im_channel):
-            value=self.im_padding_value)
+                im[:, :, i] = np.pad(
-        if label is not None:
+                    orig_im[:, :, i],
-            label = cv2.copyMakeBorder(
+                    pad_width=((0, pad_height), (0, pad_width)),
-                label,
+                    mode='constant',
-                0,
+                    constant_values=(self.im_padding_value[i],
-                pad_height,
+                                     self.im_padding_value[i]))
-                0,
-                pad_width,
+            if label is not None:
-                cv2.BORDER_CONSTANT,
+                label = np.pad(label,
-                value=self.label_padding_value)
+                               pad_width=((0, pad_height), (0, pad_width)),
+                               mode='constant',
+                               constant_values=(self.label_padding_value,
+                                                self.label_padding_value))
        if label is None:
            return (im, im_info)
        else:
@@ -753,23 +816,26 @@ class RandomPaddingCrop(SegTransform):
            pad_height = max(crop_height - img_height, 0)
            pad_width = max(crop_width - img_width, 0)
            if (pad_height > 0 or pad_width > 0):
-                im = cv2.copyMakeBorder(
+                img_channel = im.shape[2]
-                    im,
+                import copy
-                    0,
+                orig_im = copy.deepcopy(im)
-                    pad_height,
+                im = np.zeros((img_height + pad_height, img_width + pad_width,
-                    0,
+                               img_channel)).astype(orig_im.dtype)
-                    pad_width,
+                for i in range(img_channel):
-                    cv2.BORDER_CONSTANT,
+                    im[:, :, i] = np.pad(
-                    value=self.im_padding_value)
+                        orig_im[:, :, i],
+                        pad_width=((0, pad_height), (0, pad_width)),
+                        mode='constant',
+                        constant_values=(self.im_padding_value[i],
+                                         self.im_padding_value[i]))
                if label is not None:
-                    label = cv2.copyMakeBorder(
+                    label = np.pad(label,
-                        label,
+                                   pad_width=((0, pad_height), (0, pad_width)),
-                        0,
+                                   mode='constant',
-                        pad_height,
+                                   constant_values=(self.label_padding_value,
-                        0,
+                                                    self.label_padding_value))
-                        pad_width,
-                        cv2.BORDER_CONSTANT,
-                        value=self.label_padding_value)
                img_height = im.shape[0]
                img_width = im.shape[1]
@@ -1066,6 +1132,33 @@ class RandomDistort(SegTransform):
            return (im, im_info, label)
+class Clip(SegTransform):
+    """
+    对图像上超出一定范围的数据进行截断。
+    Args:
+        min_val (list): 裁剪的下限，小于min_val的数值均设为min_val. 默认值0.
+        max_val (list): 裁剪的上限，大于max_val的数值均设为max_val. 默认值255.0.
+    """
+    def __init__(self, min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0]):
+        self.min_val = min_val
+        self.max_val = max_val
+        if not (isinstance(self.min_val, list) and isinstance(self.max_val,
+                                                              list)):
+            raise ValueError("{}: input type is invalid.".format(self))
+    def __call__(self, im, im_info=None, label=None):
+        for k in range(im.shape[2]):
+            np.clip(
+                im[:, :, k], self.min_val[k], self.max_val[k], out=im[:, :, k])
+        if label is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label)
 class ArrangeSegmenter(SegTransform):
    """获取训练/验证/预测所需的信息。

--- a/paddlex/deploy.py
+++ b/paddlex/deploy.py
@@ -16,10 +16,14 @@ import os.path as osp
 import cv2
 import numpy as np
 import yaml
+import multiprocessing as mp
 import paddlex
 import paddle.fluid as fluid
 from paddlex.cv.transforms import build_transforms
-from paddlex.cv.models import BaseClassifier, YOLOv3, FasterRCNN, MaskRCNN, DeepLabv3p
+from paddlex.cv.models import BaseClassifier
+from paddlex.cv.models import PPYOLO, FasterRCNN, MaskRCNN
+from paddlex.cv.models import DeepLabv3p
+import paddlex.utils.logging as logging
 class Predictor:
@@ -27,7 +31,7 @@ class Predictor:
                 model_dir,
                 use_gpu=True,
                 gpu_id=0,
-                 use_mkl=False,
+                 use_mkl=True,
                 mkl_thread_num=4,
                 use_trt=False,
                 use_glog=False,
@@ -77,6 +81,15 @@ class Predictor:
        self.predictor = self.create_predictor(use_gpu, gpu_id, use_mkl,
                                               mkl_thread_num, use_trt,
                                               use_glog, memory_optimize)
+        # 线程池，在模型在预测时用于对输入数据以图片为单位进行并行处理
+        # 主要用于batch_predict接口
+        thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8
+        self.thread_pool = mp.pool.ThreadPool(thread_num)
+    def reset_thread_pool(self, thread_num):
+        self.thread_pool.close()
+        self.thread_pool.join()
+        self.thread_pool = mp.pool.ThreadPool(thread_num)
    def create_predictor(self,
                         use_gpu=True,
@@ -95,9 +108,14 @@ class Predictor:
            config.enable_use_gpu(100, gpu_id)
        else:
            config.disable_gpu()
-        if use_mkl:
+        if use_mkl and not use_gpu:
-            config.enable_mkldnn()
+            if self.model_name not in ["HRNet", "DeepLabv3p", "PPYOLO"]:
-            config.set_cpu_math_library_num_threads(mkl_thread_num)
+                config.enable_mkldnn()
+                config.set_cpu_math_library_num_threads(mkl_thread_num)
+            else:
+                logging.warning(
+                    "HRNet/DeepLabv3p/PPYOLO are not supported for the use of mkldnn\n"
+                )
        if use_glog:
            config.enable_glog_info()
        else:
@@ -112,7 +130,7 @@ class Predictor:
        predictor = fluid.core.create_paddle_predictor(config)
        return predictor
-    def preprocess(self, image, thread_num=1):
+    def preprocess(self, image, thread_pool=None):
        """ 对图像做预处理
            Args:
@@ -126,16 +144,16 @@ class Predictor:
                self.transforms,
                self.model_type,
                self.model_name,
-                thread_num=thread_num)
+                thread_pool=thread_pool)
            res['image'] = im
        elif self.model_type == "detector":
-            if self.model_name == "YOLOv3":
+            if self.model_name in ["PPYOLO", "YOLOv3"]:
-                im, im_size = YOLOv3._preprocess(
+                im, im_size = PPYOLO._preprocess(
                    image,
                    self.transforms,
                    self.model_type,
                    self.model_name,
-                    thread_num=thread_num)
+                    thread_pool=thread_pool)
                res['image'] = im
                res['im_size'] = im_size
            if self.model_name.count('RCNN') > 0:
@@ -144,7 +162,7 @@ class Predictor:
                    self.transforms,
                    self.model_type,
                    self.model_name,
-                    thread_num=thread_num)
+                    thread_pool=thread_pool)
                res['image'] = im
                res['im_info'] = im_resize_info
                res['im_shape'] = im_shape
@@ -154,7 +172,7 @@ class Predictor:
                self.transforms,
                self.model_type,
                self.model_name,
-                thread_num=thread_num)
+                thread_pool=thread_pool)
            res['image'] = im
            res['im_info'] = im_info
        return res
@@ -190,8 +208,8 @@ class Predictor:
            res = {'bbox': (results[0][0], offset_to_lengths(results[0][1])), }
            res['im_id'] = (np.array(
                [[i] for i in range(batch_size)]).astype('int32'), [[]])
-            if self.model_name == "YOLOv3":
+            if self.model_name in ["PPYOLO", "YOLOv3"]:
-                preds = YOLOv3._postprocess(res, batch_size, self.num_classes,
+                preds = PPYOLO._postprocess(res, batch_size, self.num_classes,
                                            self.labels)
            elif self.model_name == "FasterRCNN":
                preds = FasterRCNN._postprocess(res, batch_size,
@@ -251,17 +269,16 @@ class Predictor:
        return results[0]
-    def batch_predict(self, image_list, topk=1, thread_num=2):
+    def batch_predict(self, image_list, topk=1):
        """ 图片预测
            Args:
                image_list(list|tuple): 对列表（或元组）中的图像同时进行预测，列表中的元素可以是图像路径
                    也可以是解码后的排列格式为（H，W，C）且类型为float32且为BGR格式的数组。
-                thread_num (int): 并发执行各图像预处理时的线程数。
                topk(int): 分类预测时使用，表示预测前topk的结果
        """
-        preprocessed_input = self.preprocess(image_list)
+        preprocessed_input = self.preprocess(image_list, self.thread_pool)
        model_pred = self.raw_predict(preprocessed_input)
        im_shape = None if 'im_shape' not in preprocessed_input else preprocessed_input[
            'im_shape']

--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ paddleslim == 1.0.1
 shapely
 x2paddle
 paddlepaddle-gpu
+opencv-python
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ long_description = "PaddlePaddle Entire Process Development Toolkit"
 setuptools.setup(
    name="paddlex",
-    version='1.1.1',
+    version='1.2.1',
    author="paddlex",
    author_email="paddlex@baidu.com",
    description=long_description,
@@ -30,8 +30,8 @@ setuptools.setup(
    setup_requires=['cython', 'numpy'],
    install_requires=[
        "pycocotools;platform_system!='Windows'", 'pyyaml', 'colorama', 'tqdm',
-        'paddleslim==1.0.1', 'visualdl>=2.0.0b', 'paddlehub>=1.6.2',
+        'paddleslim==1.1.1', 'visualdl>=2.0.0', 'paddlehub>=1.8.2',
-        'shapely>=1.7.0'
+        'shapely>=1.7.0', "opencv-python"
    ],
    classifiers=[
        "Programming Language :: Python :: 3",