Merge pull request #2 from PaddlePaddle/develop

0508

Merge pull request #2 from PaddlePaddle/develop
0508
ca669355 · SunAhong1993 · GitHub · 13501bfb · 526e112a · ca669355
57 changed file
--- a/deploy/README.md
+++ b/deploy/README.md
+# 模型部署
+
+本目录为PaddleX模型部署代码。
--- a/deploy/cpp/.clang-format
+++ b/deploy/cpp/.clang-format
+# This file is used by clang-format to autoformat paddle source code
+#
+# The clang-format is part of llvm toolchain.
+# It need to install llvm and clang to format source code style.
+#
+# The basic usage is,
+#   clang-format -i -style=file PATH/TO/SOURCE/CODE
+#
+# The -style=file implicit use ".clang-format" file located in one of 
+# parent directory. 
+# The -i means inplace change.
+#
+# The document of clang-format is 
+#   http://clang.llvm.org/docs/ClangFormat.html
+#   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
+Language:        Cpp
+BasedOnStyle:  Google
+IndentWidth:     2
+TabWidth:        2
+ContinuationIndentWidth: 4
+AccessModifierOffset: -1  # The private/protected/public has no indent in class
+Standard:  Cpp11 
+AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
+...
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
+cmake_minimum_required(VERSION 3.0)
+project(PaddleX CXX C)
+
+option(WITH_MKL        "Compile demo with MKL/OpenBlas support,defaultuseMKL."          ON)
+option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    ON)
+option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   ON)
+option(WITH_TENSORRT "Compile demo with TensorRT."   OFF)
+
+SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(CUDA_LIB "" CACHE PATH "Location of libraries")
+
+include(cmake/yaml-cpp.cmake)
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+
+macro(safe_set_static_flag)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
+
+if (WITH_MKL)
+    ADD_DEFINITIONS(-DUSE_MKL)
+endif()
+
+if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
+endif()
+
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${PADDLE_DIR}/")
+include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
+include_directories("${PADDLE_DIR}/third_party/install/glog/include")
+include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
+include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
+endif()
+include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
+include_directories("${PADDLE_DIR}/third_party/boost")
+include_directories("${PADDLE_DIR}/third_party/eigen3")
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
+endif()
+
+link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
+link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
+link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
+link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
+link_directories("${PADDLE_DIR}/paddle/lib/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+if (WIN32)
+  include_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  include_directories("${PADDLE_DIR}/paddle/include")
+  link_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+  unset(OpenCV_DIR CACHE)
+else ()
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
+  include_directories("${PADDLE_DIR}/paddle/include")
+  link_directories("${PADDLE_DIR}/paddle/lib")
+endif ()
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+if (WIN32)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+    if (WITH_STATIC_LIB)
+        safe_set_static_flag()
+        add_definitions(-DSTATIC_LIB)
+    endif()
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
+    set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+
+if (WITH_GPU)
+    if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
+        message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda/lib64")
+    endif()
+    if (NOT WIN32)
+        if (NOT DEFINED CUDNN_LIB)
+            message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn/")
+        endif()
+    endif(NOT WIN32)
+endif()
+
+
+if (NOT WIN32)
+  if (WITH_TENSORRT AND WITH_GPU)
+      include_directories("${PADDLE_DIR}/third_party/install/tensorrt/include")
+      link_directories("${PADDLE_DIR}/third_party/install/tensorrt/lib")
+  endif()
+endif(NOT WIN32)
+
+if (NOT WIN32)
+    set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
+    if(EXISTS ${NGRAPH_PATH})
+        include(GNUInstallDirs)
+        include_directories("${NGRAPH_PATH}/include")
+        link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
+        set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+if(WITH_MKL)
+  include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
+  if (WIN32)
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
+  else ()
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+    execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
+  endif ()
+  set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
+  if(EXISTS ${MKLDNN_PATH})
+    include_directories("${MKLDNN_PATH}/include")
+    if (WIN32)
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
+    else ()
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+    endif ()
+  endif()
+else()
+  set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+if (WIN32)
+    if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}")
+        set(DEPS
+            ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+    else()
+        set(DEPS
+            ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+if(WITH_STATIC_LIB)
+    set(DEPS
+        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+else()
+    set(DEPS
+        ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+endif()
+
+if (NOT WIN32)
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB} 
+        glog gflags protobuf z xxhash yaml-cpp
+        )
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+else()
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB}
+        glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
+    set(DEPS ${DEPS} libcmt shlwapi)
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+endif(NOT WIN32)
+
+if(WITH_GPU)
+  if(NOT WIN32)
+    if (WITH_TENSORRT)
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+  else()
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+  endif()
+endif()
+
+if (NOT WIN32)
+    set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
+    set(DEPS ${DEPS} ${EXTERNAL_LIB})
+endif()
+
+set(DEPS ${DEPS} ${OpenCV_LIBS}) 
+add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp)
+ADD_DEPENDENCIES(classifier ext-yaml-cpp)
+target_link_libraries(classifier ${DEPS})
+
+add_executable(detector src/detector.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(detector ext-yaml-cpp)
+target_link_libraries(detector ${DEPS})
+
+add_executable(segmenter src/segmenter.cpp src/transforms.cpp src/paddlex.cpp src/visualize.cpp)
+ADD_DEPENDENCIES(segmenter ext-yaml-cpp)
+target_link_libraries(segmenter ${DEPS})
+
+if (WIN32 AND WITH_MKL)
+    add_custom_command(TARGET classifier POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
+    )
+    add_custom_command(TARGET detector POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+    )
+    add_custom_command(TARGET segmenter POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
+    )
+
+endif()
+
--- a/deploy/cpp/CMakeSettings.json
+++ b/deploy/cpp/CMakeSettings.json
+{
+    "configurations": [
+        {
+            "name": "x64-Release",
+            "generator": "Ninja",
+            "configurationType": "RelWithDebInfo",
+            "inheritEnvironments": [ "msvc_x64_x64" ],
+            "buildRoot": "${projectDir}\\out\\build\\${name}",
+            "installRoot": "${projectDir}\\out\\install\\${name}",
+            "cmakeCommandArgs": "",
+            "buildCommandArgs": "-v",
+            "ctestCommandArgs": "",
+            "variables": [
+                {
+                    "name": "OPENCV_DIR",
+                    "value": "C:/projects/opencv",
+                    "type": "PATH"
+                },
+                {
+                    "name": "PADDLE_DIR",
+                    "value": "C:/projects/fluid_install_dir_win_cpu_1.6/fluid_install_dir_win_cpu_1.6",
+                    "type": "PATH"
+                },
+                {
+                    "name": "CMAKE_BUILD_TYPE",
+                    "value": "Release",
+                    "type": "STRING"
+                },
+                {
+                    "name": "WITH_STATIC_LIB",
+                    "value": "True",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "WITH_MKL",
+                    "value": "True",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "WITH_GPU",
+                    "value": "False",
+                    "type": "BOOL"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
--- a/deploy/cpp/cmake/yaml-cpp.cmake
+++ b/deploy/cpp/cmake/yaml-cpp.cmake
+find_package(Git REQUIRED)
+
+include(ExternalProject)
+
+message("${CMAKE_BUILD_TYPE}")
+
+ExternalProject_Add(
+        ext-yaml-cpp
+        URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
+        URL_MD5 9542d6de397d1fbd649ed468cb5850e6
+        CMAKE_ARGS
+        -DYAML_CPP_BUILD_TESTS=OFF
+        -DYAML_CPP_BUILD_TOOLS=OFF
+        -DYAML_CPP_INSTALL=OFF
+        -DYAML_CPP_BUILD_CONTRIB=OFF
+        -DMSVC_SHARED_RT=OFF
+        -DBUILD_SHARED_LIBS=OFF
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+        -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+        -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+        -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
+        # Disable install step
+        INSTALL_COMMAND ""
+        LOG_DOWNLOAD ON
+        LOG_BUILD 1
+)
+
--- a/deploy/cpp/include/paddlex/config_parser.h
+++ b/deploy/cpp/include/paddlex/config_parser.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/yaml.h"
+
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+
+namespace PaddleX {
+
+// Inference model configuration parser
+class ConfigPaser {
+ public:
+  ConfigPaser() {}
+
+  ~ConfigPaser() {}
+
+  bool load_config(const std::string& model_dir,
+                   const std::string& cfg = "model.yml") {
+    // Load as a YAML::Node
+    YAML::Node config;
+    config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
+
+    if (config["Transforms"].IsDefined()) {
+      YAML::Node transforms_ = config["Transforms"];
+    } else {
+      std::cerr << "There's no field 'Transforms' in model.yml" << std::endl;
+      return false;
+    }
+    return true;
+  }
+
+  YAML::Node Transforms_;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <iostream>
+#include <numeric>
+
+#include "yaml-cpp/yaml.h"
+
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+
+#include "paddle_inference_api.h"  // NOLINT
+
+#include "include/paddlex/config_parser.h"
+#include "include/paddlex/results.h"
+#include "include/paddlex/transforms.h"
+
+namespace PaddleX {
+
+class Model {
+ public:
+  void Init(const std::string& model_dir,
+            bool use_gpu = false,
+            int gpu_id = 0) {
+    create_predictor(model_dir, use_gpu, gpu_id);
+  }
+
+  void create_predictor(const std::string& model_dir,
+                        bool use_gpu = false,
+                        int gpu_id = 0);
+
+  bool load_config(const std::string& model_dir);
+
+  bool preprocess(const cv::Mat& input_im, ImageBlob* blob);
+
+  bool predict(const cv::Mat& im, ClsResult* result);
+
+  bool predict(const cv::Mat& im, DetResult* result);
+
+  bool predict(const cv::Mat& im, SegResult* result);
+
+  bool postprocess(SegResult* result);
+
+  bool postprocess(DetResult* result);
+
+  std::string type;
+  std::string name;
+  std::map<int, std::string> labels;
+  Transforms transforms_;
+  ImageBlob inputs_;
+  std::vector<float> outputs_;
+  std::unique_ptr<paddle::PaddlePredictor> predictor_;
+};
+}  // namespce of PaddleX
--- a/deploy/cpp/include/paddlex/results.h
+++ b/deploy/cpp/include/paddlex/results.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace PaddleX {
+
+template <class T>
+struct Mask {
+  std::vector<T> data;
+  std::vector<int> shape;
+  void clear() {
+    data.clear();
+    shape.clear();
+  }
+};
+
+struct Box {
+  int category_id;
+  std::string category;
+  float score;
+  std::vector<float> coordinate;
+  Mask<float> mask;
+};
+
+class BaseResult {
+ public:
+  std::string type = "base";
+};
+
+class ClsResult : public BaseResult {
+ public:
+  int category_id;
+  std::string category;
+  float score;
+  std::string type = "cls";
+};
+
+class DetResult : public BaseResult {
+ public:
+  std::vector<Box> boxes;
+  int mask_resolution;
+  std::string type = "det";
+  void clear() { boxes.clear(); }
+};
+
+class SegResult : public BaseResult {
+ public:
+  Mask<int64_t> label_map;
+  Mask<float> score_map;
+  void clear() {
+    label_map.clear();
+    score_map.clear();
+  }
+};
+}  // namespce of PaddleX
--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <yaml-cpp/yaml.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+namespace PaddleX {
+
+// Object for storing all preprocessed data
+class ImageBlob {
+ public:
+  // Original image height and width
+  std::vector<int> ori_im_size_ = std::vector<int>(2);
+  // Newest image height and width after process
+  std::vector<int> new_im_size_ = std::vector<int>(2);
+  // Image height and width before padding
+  std::vector<int> im_size_before_padding_ = std::vector<int>(2);
+  // Image height and width before resize
+  std::vector<int> im_size_before_resize_ = std::vector<int>(2);
+  // Reshape order
+  std::vector<std::string> reshape_order_;
+  // Resize scale
+  float scale = 1.0;
+  // Buffer for image data after preprocessing
+  std::vector<float> im_data_;
+
+  void clear() {
+    ori_im_size_.clear();
+    new_im_size_.clear();
+    im_size_before_padding_.clear();
+    im_size_before_resize_.clear();
+    reshape_order_.clear();
+    im_data_.clear();
+  }
+};
+
+// Abstraction of preprocessing opration class
+class Transform {
+ public:
+  virtual void Init(const YAML::Node& item) = 0;
+  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
+};
+
+class Normalize : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    mean_ = item["mean"].as<std::vector<float>>();
+    std_ = item["std"].as<std::vector<float>>();
+  }
+
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  std::vector<float> mean_;
+  std::vector<float> std_;
+};
+
+class ResizeByShort : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    short_size_ = item["short_size"].as<int>();
+    if (item["max_size"].IsDefined()) {
+      max_size_ = item["max_size"].as<int>();
+    } else {
+      max_size_ = -1;
+    }
+  };
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  float GenerateScale(const cv::Mat& im);
+  int short_size_;
+  int max_size_;
+};
+
+class ResizeByLong : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    long_size_ = item["long_size"].as<int>();
+  };
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int long_size_;
+};
+
+class Resize : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["target_size"].IsScalar()) {
+      height_ = item["target_size"].as<int>();
+      width_ = item["target_size"].as<int>();
+      interp_ = item["interp"].as<std::string>();
+    } else if (item["target_size"].IsSequence()) {
+      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
+      width_ = target_size[0];
+      height_ = target_size[1];
+    }
+    if (height_ <= 0 || width_ <= 0) {
+      std::cerr << "[Resize] target_size should greater than 0" << std::endl;
+      exit(-1);
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int height_;
+  int width_;
+  std::string interp_;
+};
+
+class CenterCrop : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["crop_size"].IsScalar()) {
+      height_ = item["crop_size"].as<int>();
+      width_ = item["crop_size"].as<int>();
+    } else if (item["crop_size"].IsSequence()) {
+      std::vector<int> crop_size = item["crop_size"].as<std::vector<int>>();
+      width_ = crop_size[0];
+      height_ = crop_size[1];
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int height_;
+  int width_;
+};
+
+class Padding : public Transform {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    if (item["coarsest_stride"].IsDefined()) {
+      coarsest_stride_ = item["coarsest_stride"].as<int>();
+      if (coarsest_stride_ <= 1) {
+        std::cerr << "[Padding] coarest_stride should greater than 0"
+                  << std::endl;
+        exit(-1);
+      }
+    } else {
+      if (item["target_size"].IsScalar()) {
+        width_ = item["target_size"].as<int>();
+        height_ = item["target_size"].as<int>();
+      } else if (item["target_size"].IsSequence()) {
+        width_ = item["target_size"].as<std::vector<int>>()[0];
+        height_ = item["target_size"].as<std::vector<int>>()[1];
+      }
+    }
+    if (item["im_padding_value"].IsDefined()) {
+      value_ = item["im_padding_value"].as<std::vector<float>>();
+    }
+  }
+  virtual bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int coarsest_stride_ = -1;
+  int width_ = 0;
+  int height_ = 0;
+  std::vector<float> value_;
+};
+
+class Transforms {
+ public:
+  void Init(const YAML::Node& node, bool to_rgb = true);
+  std::shared_ptr<Transform> CreateTransform(const std::string& name);
+  bool Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  std::vector<std::shared_ptr<Transform>> transforms_;
+  bool to_rgb_ = true;
+};
+
+}  // namespace PaddleX
--- a/deploy/cpp/include/paddlex/visualize.h
+++ b/deploy/cpp/include/paddlex/visualize.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <vector>
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+#else  // Linux/Unix
+#include <dirent.h>
+#include <sys/io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <string>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "include/paddlex/results.h"
+
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+
+namespace PaddleX {
+
+// Generate visualization colormap for each class
+std::vector<int> GenerateColorMap(int num_class);
+
+cv::Mat VisualizeDet(const cv::Mat& img,
+                     const DetResult& results,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold = 0.5);
+
+cv::Mat VisualizeSeg(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap);
+
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path);
+}  // namespce of PaddleX
--- a/deploy/cpp/scripts/bootstrap.sh
+++ b/deploy/cpp/scripts/bootstrap.sh
+# download pre-compiled opencv lib
+OPENCV_URL=https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2
+if [ ! -d "./deps/opencv3gcc4.8" ]; then
+    mkdir -p deps
+    cd deps
+    wget -c ${OPENCV_URL}
+    tar xvfj opencv3gcc4.8.tar.bz2
+    rm -rf opencv3gcc4.8.tar.bz2
+    cd ..
+fi
--- a/deploy/cpp/scripts/build.sh
+++ b/deploy/cpp/scripts/build.sh
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=ON
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+# Paddle 预测库路径
+PADDLE_DIR=/path/to/fluid_inference/
+# CUDA 的 lib 路径
+CUDA_LIB=/path/to/cuda/lib/
+# CUDNN 的 lib 路径
+CUDNN_LIB=/path/to/cudnn/lib/
+
+# OPENCV 路径, 如果使用自带预编译版本可不修改
+OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
+sh $(pwd)/scripts/bootstrap.sh
+
+# 以下无需改动
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+    -DWITH_GPU=${WITH_GPU} \
+    -DWITH_TENSORRT=${WITH_TENSORRT} \
+    -DPADDLE_DIR=${PADDLE_DIR} \
+    -DCUDA_LIB=${CUDA_LIB} \
+    -DCUDNN_LIB=${CUDNN_LIB} \
+    -DOPENCV_DIR=${OPENCV_DIR}
+make
--- a/deploy/cpp/src/classifier.cpp
+++ b/deploy/cpp/src/classifier.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "include/paddlex/paddlex.h"
+
+DEFINE_string(model_dir, "", "Path of inference model");
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+
+int main(int argc, char** argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+
+  // 加载模型
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_gpu_id);
+
+  // 进行预测
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::ClsResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      std::cout << "Predict label: " << result.category
+                << ", label_id:" << result.category_id
+                << ", score: " << result.score << std::endl;
+    }
+  } else {
+    PaddleX::ClsResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    std::cout << "Predict label: " << result.category
+              << ", label_id:" << result.category_id
+              << ", score: " << result.score << std::endl;
+  }
+
+  return 0;
+}
--- a/deploy/cpp/src/detector.cpp
+++ b/deploy/cpp/src/detector.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+
+DEFINE_string(model_dir, "", "Path of inference model");
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(save_dir, "output", "Path to save visualized image");
+
+int main(int argc, char** argv) {
+  // 解析命令行参数
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+
+  // 加载模型
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_gpu_id);
+
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  std::string save_dir = "output";
+  // 进行预测
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::DetResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      for (int i = 0; i < result.boxes.size(); ++i) {
+        std::cout << "image file: " << image_path
+                  << ", predict label: " << result.boxes[i].category
+                  << ", label_id:" << result.boxes[i].category_id
+                  << ", score: " << result.boxes[i].score << ", box:("
+                  << result.boxes[i].coordinate[0] << ", "
+                  << result.boxes[i].coordinate[1] << ", "
+                  << result.boxes[i].coordinate[2] << ", "
+                  << result.boxes[i].coordinate[3] << std::endl;
+      }
+
+      // 可视化
+      cv::Mat vis_img =
+          PaddleX::VisualizeDet(im, result, model.labels, colormap, 0.5);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+      cv::imwrite(save_path, vis_img);
+      result.clear();
+      std::cout << "Visualized output saved as " << save_path << std::endl;
+    }
+  } else {
+    PaddleX::DetResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    for (int i = 0; i < result.boxes.size(); ++i) {
+      std::cout << ", predict label: " << result.boxes[i].category
+                << ", label_id:" << result.boxes[i].category_id
+                << ", score: " << result.boxes[i].score << ", box:("
+                << result.boxes[i].coordinate[0] << ", "
+                << result.boxes[i].coordinate[1] << ", "
+                << result.boxes[i].coordinate[2] << ", "
+                << result.boxes[i].coordinate[3] << std::endl;
+    }
+
+    // 可视化
+    cv::Mat vis_img =
+        PaddleX::VisualizeDet(im, result, model.labels, colormap, 0.5);
+    std::string save_path =
+        PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+    cv::imwrite(save_path, vis_img);
+    result.clear();
+    std::cout << "Visualized output saved as " << save_path << std::endl;
+  }
+
+  return 0;
+}
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/paddlex/paddlex.h"
+
+namespace PaddleX {
+
+void Model::create_predictor(const std::string& model_dir,
+                             bool use_gpu,
+                             int gpu_id) {
+  // 读取配置文件
+  if (!load_config(model_dir)) {
+    std::cerr << "Parse file 'model.yml' failed!" << std::endl;
+    exit(-1);
+  }
+  paddle::AnalysisConfig config;
+  std::string model_file = model_dir + OS_PATH_SEP + "__model__";
+  std::string params_file = model_dir + OS_PATH_SEP + "__params__";
+  config.SetModel(model_file, params_file);
+  if (use_gpu) {
+    config.EnableUseGpu(100, gpu_id);
+  } else {
+    config.DisableGpu();
+  }
+  config.SwitchUseFeedFetchOps(false);
+  config.SwitchSpecifyInputNames(true);
+  // 开启内存优化
+  config.EnableMemoryOptim();
+  predictor_ = std::move(CreatePaddlePredictor(config));
+}
+
+bool Model::load_config(const std::string& model_dir) {
+  std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
+  YAML::Node config = YAML::LoadFile(yaml_file);
+  type = config["_Attributes"]["model_type"].as<std::string>();
+  name = config["Model"].as<std::string>();
+  bool to_rgb = true;
+  if (config["TransformsMode"].IsDefined()) {
+    std::string mode = config["TransformsMode"].as<std::string>();
+    if (mode == "BGR") {
+      to_rgb = false;
+    } else if (mode != "RGB") {
+      std::cerr << "[Init] Only 'RGB' or 'BGR' is supported for TransformsMode"
+                << std::endl;
+      return false;
+    }
+  }
+  // 构建数据处理流
+  transforms_.Init(config["Transforms"], to_rgb);
+  // 读入label list
+  labels.clear();
+  for (const auto& item : config["_Attributes"]["labels"]) {
+    int index = labels.size();
+    labels[index] = item.as<std::string>();
+  }
+  return true;
+}
+
+bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
+  cv::Mat im = input_im.clone();
+  if (!transforms_.Run(&im, &inputs_)) {
+    return false;
+  }
+  return true;
+}
+
+bool Model::predict(const cv::Mat& im, ClsResult* result) {
+  inputs_.clear();
+  if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!"
+              << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!"
+              << std::endl;
+    return false;
+  }
+  // 处理输入图像
+  if (!preprocess(im, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+  // 使用加载的模型进行预测
+  auto in_tensor = predictor_->GetInputTensor("image");
+  int h = inputs_.new_im_size_[0];
+  int w = inputs_.new_im_size_[1];
+  in_tensor->Reshape({1, 3, h, w});
+  in_tensor->copy_from_cpu(inputs_.im_data_.data());
+  predictor_->ZeroCopyRun();
+  // 取出模型的输出结果
+  auto output_names = predictor_->GetOutputNames();
+  auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_shape = output_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_shape) {
+    size *= i;
+  }
+  outputs_.resize(size);
+  output_tensor->copy_to_cpu(outputs_.data());
+  // 对模型输出结果进行后处理
+  auto ptr = std::max_element(std::begin(outputs_), std::end(outputs_));
+  result->category_id = std::distance(std::begin(outputs_), ptr);
+  result->score = *ptr;
+  result->category = labels[result->category_id];
+}
+
+bool Model::predict(const cv::Mat& im, DetResult* result) {
+  result->clear();
+  inputs_.clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!"
+              << std::endl;
+    return false;
+  } else if (type == "segmenter") {
+    std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
+                 "to function predict()!"
+              << std::endl;
+    return false;
+  }
+
+  // 处理输入图像
+  if (!preprocess(im, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+
+  int h = inputs_.new_im_size_[0];
+  int w = inputs_.new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->copy_from_cpu(inputs_.im_data_.data());
+  if (name == "YOLOv3") {
+    auto im_size_tensor = predictor_->GetInputTensor("im_size");
+    im_size_tensor->Reshape({1, 2});
+    im_size_tensor->copy_from_cpu(inputs_.ori_im_size_.data());
+  } else if (name == "FasterRCNN" || name == "MaskRCNN") {
+    auto im_info_tensor = predictor_->GetInputTensor("im_info");
+    auto im_shape_tensor = predictor_->GetInputTensor("im_shape");
+    im_info_tensor->Reshape({1, 3});
+    im_shape_tensor->Reshape({1, 3});
+    float ori_h = static_cast<float>(inputs_.ori_im_size_[0]);
+    float ori_w = static_cast<float>(inputs_.ori_im_size_[1]);
+    float new_h = static_cast<float>(inputs_.new_im_size_[0]);
+    float new_w = static_cast<float>(inputs_.new_im_size_[1]);
+    float im_info[] = {new_h, new_w, inputs_.scale};
+    float im_shape[] = {ori_h, ori_w, 1.0};
+    im_info_tensor->copy_from_cpu(im_info);
+    im_shape_tensor->copy_from_cpu(im_shape);
+  }
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+
+  std::vector<float> output_box;
+  auto output_names = predictor_->GetOutputNames();
+  auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_box_shape = output_box_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_box_shape) {
+    size *= i;
+  }
+  output_box.resize(size);
+  output_box_tensor->copy_to_cpu(output_box.data());
+  if (size < 6) {
+    std::cerr << "[WARNING] There's no object detected." << std::endl;
+    return true;
+  }
+  int num_boxes = size / 6;
+  // 解析预测框box
+  for (int i = 0; i < num_boxes; ++i) {
+    Box box;
+    box.category_id = static_cast<int>(round(output_box[i * 6]));
+    box.category = labels[box.category_id];
+    box.score = output_box[i * 6 + 1];
+    float xmin = output_box[i * 6 + 2];
+    float ymin = output_box[i * 6 + 3];
+    float xmax = output_box[i * 6 + 4];
+    float ymax = output_box[i * 6 + 5];
+    float w = xmax - xmin + 1;
+    float h = ymax - ymin + 1;
+    box.coordinate = {xmin, ymin, w, h};
+    result->boxes.push_back(std::move(box));
+  }
+  // 实例分割需解析mask
+  if (name == "MaskRCNN") {
+    std::vector<float> output_mask;
+    auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
+    std::vector<int> output_mask_shape = output_mask_tensor->shape();
+    int masks_size = 1;
+    for (const auto& i : output_mask_shape) {
+      masks_size *= i;
+    }
+    int mask_pixels = output_mask_shape[2] * output_mask_shape[3];
+    int classes = output_mask_shape[1];
+    output_mask.resize(masks_size);
+    output_mask_tensor->copy_to_cpu(output_mask.data());
+    result->mask_resolution = output_mask_shape[2];
+    for (int i = 0; i < result->boxes.size(); ++i) {
+      Box* box = &result->boxes[i];
+      auto begin_mask =
+          output_mask.begin() + (i * classes + box->category_id) * mask_pixels;
+      auto end_mask = begin_mask + mask_pixels;
+      box->mask.data.assign(begin_mask, end_mask);
+      box->mask.shape = {static_cast<int>(box->coordinate[2]),
+                         static_cast<int>(box->coordinate[3])};
+    }
+  }
+}
+
+bool Model::predict(const cv::Mat& im, SegResult* result) {
+  result->clear();
+  inputs_.clear();
+  if (type == "classifier") {
+    std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
+                 "to function predict()!"
+              << std::endl;
+    return false;
+  } else if (type == "detector") {
+    std::cerr << "Loading model is a 'detector', DetResult should be passed to "
+                 "function predict()!"
+              << std::endl;
+    return false;
+  }
+
+  // 处理输入图像
+  if (!preprocess(im, &inputs_)) {
+    std::cerr << "Preprocess failed!" << std::endl;
+    return false;
+  }
+
+  int h = inputs_.new_im_size_[0];
+  int w = inputs_.new_im_size_[1];
+  auto im_tensor = predictor_->GetInputTensor("image");
+  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->copy_from_cpu(inputs_.im_data_.data());
+  std::cout << "input image: " << h << " " << w << std::endl;
+
+  // 使用加载的模型进行预测
+  predictor_->ZeroCopyRun();
+
+  // 获取预测置信度，经过argmax后的labelmap
+  auto output_names = predictor_->GetOutputNames();
+  auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
+  std::vector<int> output_label_shape = output_label_tensor->shape();
+  int size = 1;
+  for (const auto& i : output_label_shape) {
+    size *= i;
+    result->label_map.shape.push_back(i);
+  }
+  result->label_map.data.resize(size);
+  output_label_tensor->copy_to_cpu(result->label_map.data.data());
+
+  // 获取预测置信度scoremap
+  auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
+  std::vector<int> output_score_shape = output_score_tensor->shape();
+  size = 1;
+  for (const auto& i : output_score_shape) {
+    size *= i;
+    result->score_map.shape.push_back(i);
+  }
+  result->score_map.data.resize(size);
+  output_score_tensor->copy_to_cpu(result->score_map.data.data());
+
+  // 解析输出结果到原图大小
+  std::vector<uint8_t> label_map(result->label_map.data.begin(),
+                                 result->label_map.data.end());
+  cv::Mat mask_label(result->label_map.shape[1],
+                     result->label_map.shape[2],
+                     CV_8UC1,
+                     label_map.data());
+
+  cv::Mat mask_score(result->score_map.shape[2],
+                     result->score_map.shape[3],
+                     CV_32FC1,
+                     result->score_map.data.data());
+
+  for (std::vector<std::string>::reverse_iterator iter =
+           inputs_.reshape_order_.rbegin();
+       iter != inputs_.reshape_order_.rend();
+       ++iter) {
+    if (*iter == "padding") {
+      auto padding_w = inputs_.im_size_before_padding_[0];
+      auto padding_h = inputs_.im_size_before_padding_[1];
+      mask_label = mask_label(cv::Rect(0, 0, padding_w, padding_h));
+      mask_score = mask_score(cv::Rect(0, 0, padding_w, padding_h));
+    } else if (*iter == "resize") {
+      auto resize_w = inputs_.im_size_before_resize_[0];
+      auto resize_h = inputs_.im_size_before_resize_[1];
+      cv::resize(mask_label,
+                 mask_label,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_NEAREST);
+      cv::resize(mask_score,
+                 mask_score,
+                 cv::Size(resize_h, resize_w),
+                 0,
+                 0,
+                 cv::INTER_NEAREST);
+    }
+  }
+  result->label_map.data.assign(mask_label.begin<uint8_t>(),
+                                mask_label.end<uint8_t>());
+  result->label_map.shape = {mask_label.rows, mask_label.cols};
+  result->score_map.data.assign(mask_score.begin<float>(),
+                                mask_score.end<float>());
+  result->score_map.shape = {mask_score.rows, mask_score.cols};
+}
+
+}  // namespce of PaddleX
--- a/deploy/cpp/src/segmenter.cpp
+++ b/deploy/cpp/src/segmenter.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "include/paddlex/paddlex.h"
+#include "include/paddlex/visualize.h"
+
+DEFINE_string(model_dir, "", "Path of inference model");
+DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_int32(gpu_id, 0, "GPU card id");
+DEFINE_string(image, "", "Path of test image file");
+DEFINE_string(image_list, "", "Path of test image list file");
+DEFINE_string(save_dir, "output", "Path to save visualized image");
+
+int main(int argc, char** argv) {
+  // 解析命令行参数
+  google::ParseCommandLineFlags(&argc, &argv, true);
+
+  if (FLAGS_model_dir == "") {
+    std::cerr << "--model_dir need to be defined" << std::endl;
+    return -1;
+  }
+  if (FLAGS_image == "" & FLAGS_image_list == "") {
+    std::cerr << "--image or --image_list need to be defined" << std::endl;
+    return -1;
+  }
+
+  // 加载模型
+  PaddleX::Model model;
+  model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_gpu_id);
+  auto colormap = PaddleX::GenerateColorMap(model.labels.size());
+  // 进行预测
+  if (FLAGS_image_list != "") {
+    std::ifstream inf(FLAGS_image_list);
+    if (!inf) {
+      std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
+      return -1;
+    }
+    std::string image_path;
+    while (getline(inf, image_path)) {
+      PaddleX::SegResult result;
+      cv::Mat im = cv::imread(image_path, 1);
+      model.predict(im, &result);
+      // 可视化
+      cv::Mat vis_img =
+          PaddleX::VisualizeSeg(im, result, model.labels, colormap);
+      std::string save_path =
+          PaddleX::generate_save_path(FLAGS_save_dir, image_path);
+      cv::imwrite(save_path, vis_img);
+      result.clear();
+      std::cout << "Visualized output saved as " << save_path << std::endl;
+    }
+  } else {
+    PaddleX::SegResult result;
+    cv::Mat im = cv::imread(FLAGS_image, 1);
+    model.predict(im, &result);
+    // 可视化
+    cv::Mat vis_img = PaddleX::VisualizeSeg(im, result, model.labels, colormap);
+    std::string save_path =
+        PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
+    cv::imwrite(save_path, vis_img);
+    result.clear();
+    std::cout << "Visualized output saved as " << save_path << std::endl;
+  }
+
+  return 0;
+}
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "include/paddlex/transforms.h"
+
+namespace PaddleX {
+
+std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
+                                             {"NEAREST", cv::INTER_NEAREST},
+                                             {"AREA", cv::INTER_AREA},
+                                             {"CUBIC", cv::INTER_CUBIC},
+                                             {"LANCZOS4", cv::INTER_LANCZOS4}};
+
+bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
+  for (int h = 0; h < im->rows; h++) {
+    for (int w = 0; w < im->cols; w++) {
+      im->at<cv::Vec3f>(h, w)[0] =
+          (im->at<cv::Vec3f>(h, w)[0] / 255.0 - mean_[0]) / std_[0];
+      im->at<cv::Vec3f>(h, w)[1] =
+          (im->at<cv::Vec3f>(h, w)[1] / 255.0 - mean_[1]) / std_[1];
+      im->at<cv::Vec3f>(h, w)[2] =
+          (im->at<cv::Vec3f>(h, w)[2] / 255.0 - mean_[2]) / std_[2];
+    }
+  }
+  return true;
+}
+
+float ResizeByShort::GenerateScale(const cv::Mat& im) {
+  int origin_w = im.cols;
+  int origin_h = im.rows;
+  int im_size_max = std::max(origin_w, origin_h);
+  int im_size_min = std::min(origin_w, origin_h);
+  float scale =
+      static_cast<float>(short_size_) / static_cast<float>(im_size_min);
+  if (max_size_ > 0) {
+    if (round(scale * im_size_max) > max_size_) {
+      scale = static_cast<float>(max_size_) / static_cast<float>(im_size_max);
+    }
+  }
+  return scale;
+}
+
+bool ResizeByShort::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_resize_[0] = im->rows;
+  data->im_size_before_resize_[1] = im->cols;
+  data->reshape_order_.push_back("resize");
+
+  float scale = GenerateScale(*im);
+  int width = static_cast<int>(scale * im->cols);
+  int height = static_cast<int>(scale * im->rows);
+  cv::resize(*im, *im, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
+
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
+  return true;
+}
+
+bool CenterCrop::Run(cv::Mat* im, ImageBlob* data) {
+  int height = static_cast<int>(im->rows);
+  int width = static_cast<int>(im->cols);
+  if (height < height_ || width < width_) {
+    std::cerr << "[CenterCrop] Image size less than crop size" << std::endl;
+    return false;
+  }
+  int offset_x = static_cast<int>((width - width_) / 2);
+  int offset_y = static_cast<int>((height - height_) / 2);
+  cv::Rect crop_roi(offset_x, offset_y, width_, height_);
+  *im = (*im)(crop_roi);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+
+bool Padding::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_size_before_padding_[0] = im->rows;
+  data->im_size_before_padding_[1] = im->cols;
+  data->reshape_order_.push_back("padding");
+
+  int padding_w = 0;
+  int padding_h = 0;
+  if (width_ > 0 & height_ > 0) {
+    padding_w = width_ - im->cols;
+    padding_h = height_ - im->rows;
+  } else if (coarsest_stride_ > 0) {
+    padding_h =
+        ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
+    padding_w =
+        ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
+  }
+  if (padding_h < 0 || padding_w < 0) {
+    std::cerr << "[Padding] Computed padding_h=" << padding_h
+              << ", padding_w=" << padding_w
+              << ", but they should be greater than 0." << std::endl;
+    return false;
+  }
+  cv::copyMakeBorder(
+      *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, cv::Scalar(0));
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+
+bool ResizeByLong::Run(cv::Mat* im, ImageBlob* data) {
+  if (long_size_ <= 0) {
+    std::cerr << "[ResizeByLong] long_size should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_[0] = im->rows;
+  data->im_size_before_resize_[1] = im->cols;
+  data->reshape_order_.push_back("resize");
+  int origin_w = im->cols;
+  int origin_h = im->rows;
+
+  int im_size_max = std::max(origin_w, origin_h);
+  float scale =
+      static_cast<float>(long_size_) / static_cast<float>(im_size_max);
+  cv::resize(*im, *im, cv::Size(), scale, scale, cv::INTER_NEAREST);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  data->scale = scale;
+  return true;
+}
+
+bool Resize::Run(cv::Mat* im, ImageBlob* data) {
+  if (width_ <= 0 || height_ <= 0) {
+    std::cerr << "[Resize] width and height should be greater than 0"
+              << std::endl;
+    return false;
+  }
+  if (interpolations.count(interp_) <= 0) {
+    std::cerr << "[Resize] Invalid interpolation method: '" << interp_ << "'"
+              << std::endl;
+    return false;
+  }
+  data->im_size_before_resize_[0] = im->rows;
+  data->im_size_before_resize_[1] = im->cols;
+  data->reshape_order_.push_back("resize");
+
+  cv::resize(
+      *im, *im, cv::Size(width_, height_), 0, 0, interpolations[interp_]);
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  return true;
+}
+
+void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
+  transforms_.clear();
+  to_rgb_ = to_rgb;
+  for (const auto& item : transforms_node) {
+    std::string name = item.begin()->first.as<std::string>();
+    std::cout << "trans name: " << name << std::endl;
+    std::shared_ptr<Transform> transform = CreateTransform(name);
+    transform->Init(item.begin()->second);
+    transforms_.push_back(transform);
+  }
+}
+
+std::shared_ptr<Transform> Transforms::CreateTransform(
+    const std::string& transform_name) {
+  if (transform_name == "Normalize") {
+    return std::make_shared<Normalize>();
+  } else if (transform_name == "ResizeByShort") {
+    return std::make_shared<ResizeByShort>();
+  } else if (transform_name == "CenterCrop") {
+    return std::make_shared<CenterCrop>();
+  } else if (transform_name == "Resize") {
+    return std::make_shared<Resize>();
+  } else if (transform_name == "Padding") {
+    return std::make_shared<Padding>();
+  } else if (transform_name == "ResizeByLong") {
+    return std::make_shared<ResizeByLong>();
+  } else {
+    std::cerr << "There's unexpected transform(name='" << transform_name
+              << "')." << std::endl;
+    exit(-1);
+  }
+}
+
+bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
+  // 按照transforms中预处理算子顺序处理图像
+  if (to_rgb_) {
+    cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
+  }
+  (*im).convertTo(*im, CV_32FC3);
+  data->ori_im_size_[0] = im->rows;
+  data->ori_im_size_[1] = im->cols;
+  data->new_im_size_[0] = im->rows;
+  data->new_im_size_[1] = im->cols;
+  for (int i = 0; i < transforms_.size(); ++i) {
+    if (!transforms_[i]->Run(im, data)) {
+      std::cerr << "Apply transforms to image failed!" << std::endl;
+      return false;
+    }
+  }
+
+  // 将图像由NHWC转为NCHW格式
+  // 同时转为连续的内存块存储到ImageBlob
+  int h = im->rows;
+  int w = im->cols;
+  int c = im->channels();
+  (data->im_data_).resize(c * h * w);
+  float* ptr = (data->im_data_).data();
+  for (int i = 0; i < c; ++i) {
+    cv::extractChannel(*im, cv::Mat(h, w, CV_32FC1, ptr + i * h * w), i);
+  }
+  return true;
+}
+}  // namespace PaddleX
--- a/deploy/cpp/src/visualize.cpp
+++ b/deploy/cpp/src/visualize.cpp
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/paddlex/visualize.h"
+
+namespace PaddleX {
+std::vector<int> GenerateColorMap(int num_class) {
+  auto colormap = std::vector<int>(3 * num_class, 0);
+  for (int i = 0; i < num_class; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
+    }
+  }
+  return colormap;
+}
+
+cv::Mat VisualizeDet(const cv::Mat& img,
+                     const DetResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap,
+                     float threshold) {
+  cv::Mat vis_img = img.clone();
+  auto boxes = result.boxes;
+  for (int i = 0; i < boxes.size(); ++i) {
+    if (boxes[i].score < threshold) {
+      continue;
+    }
+    cv::Rect roi = cv::Rect(boxes[i].coordinate[0],
+                            boxes[i].coordinate[1],
+                            boxes[i].coordinate[2],
+                            boxes[i].coordinate[3]);
+
+    // 生成预测框和标题
+    std::string text = boxes[i].category;
+    int c1 = colormap[3 * boxes[i].category_id + 0];
+    int c2 = colormap[3 * boxes[i].category_id + 1];
+    int c3 = colormap[3 * boxes[i].category_id + 2];
+    cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
+    text += std::to_string(static_cast<int>(boxes[i].score * 100)) + "%";
+    int font_face = cv::FONT_HERSHEY_SIMPLEX;
+    double font_scale = 0.5f;
+    float thickness = 0.5;
+    cv::Size text_size =
+        cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+    cv::Point origin;
+    origin.x = roi.x;
+    origin.y = roi.y;
+
+    // 生成预测框标题的背景
+    cv::Rect text_back = cv::Rect(boxes[i].coordinate[0],
+                                  boxes[i].coordinate[1] - text_size.height,
+                                  text_size.width,
+                                  text_size.height);
+
+    // 绘图和文字
+    cv::rectangle(vis_img, roi, roi_color, 2);
+    cv::rectangle(vis_img, text_back, roi_color, -1);
+    cv::putText(vis_img,
+                text,
+                origin,
+                font_face,
+                font_scale,
+                cv::Scalar(255, 255, 255),
+                thickness);
+
+    // 生成实例分割mask
+    if (boxes[i].mask.data.size() == 0) {
+      continue;
+    }
+    cv::Mat bin_mask(result.mask_resolution,
+                     result.mask_resolution,
+                     CV_32FC1,
+                     boxes[i].mask.data.data());
+    cv::resize(bin_mask,
+               bin_mask,
+               cv::Size(boxes[i].mask.shape[0], boxes[i].mask.shape[1]));
+    cv::threshold(bin_mask, bin_mask, 0.5, 1, cv::THRESH_BINARY);
+    cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1);
+    bin_mask.copyTo(full_mask(roi));
+    cv::Mat mask_ch[3];
+    mask_ch[0] = full_mask * c1;
+    mask_ch[1] = full_mask * c2;
+    mask_ch[2] = full_mask * c3;
+    cv::Mat mask;
+    cv::merge(mask_ch, 3, mask);
+    cv::addWeighted(vis_img, 1, mask, 0.5, 0, vis_img);
+  }
+  return vis_img;
+}
+
+cv::Mat VisualizeSeg(const cv::Mat& img,
+                     const SegResult& result,
+                     const std::map<int, std::string>& labels,
+                     const std::vector<int>& colormap) {
+  std::vector<uint8_t> label_map(result.label_map.data.begin(),
+                                 result.label_map.data.end());
+  cv::Mat mask(result.label_map.shape[0],
+               result.label_map.shape[1],
+               CV_8UC1,
+               label_map.data());
+  cv::Mat color_mask = cv::Mat::zeros(
+      result.label_map.shape[0], result.label_map.shape[1], CV_8UC3);
+  int rows = img.rows;
+  int cols = img.cols;
+  for (int i = 0; i < rows; i++) {
+    for (int j = 0; j < cols; j++) {
+      int category_id = static_cast<int>(mask.at<uchar>(i, j));
+      color_mask.at<cv::Vec3b>(i, j)[0] = colormap[3 * category_id + 0];
+      color_mask.at<cv::Vec3b>(i, j)[1] = colormap[3 * category_id + 1];
+      color_mask.at<cv::Vec3b>(i, j)[2] = colormap[3 * category_id + 2];
+    }
+  }
+  return color_mask;
+}
+
+std::string generate_save_path(const std::string& save_dir,
+                               const std::string& file_path) {
+  if (access(save_dir.c_str(), 0) < 0) {
+#ifdef _WIN32
+    mkdir(save_dir.c_str());
+#else
+    if (mkdir(save_dir.c_str(), S_IRWXU) < 0) {
+      std::cerr << "Fail to create " << save_dir << "directory." << std::endl;
+    }
+#endif
+  }
+  int pos = file_path.find_last_of(OS_PATH_SEP);
+  std::string image_name(file_path.substr(pos + 1));
+  return save_dir + OS_PATH_SEP + image_name;
+}
+}  // namespace of PaddleX
--- a/docs/anaconda_install.md
+++ b/docs/anaconda_install.md
+# Anaconda安装使用
+Anaconda是一个开源的Python发行版本，其包含了conda、Python等180多个科学包及其依赖项。使用Anaconda可以通过创建多个独立的Python环境，避免用户的Python环境安装太多不同版本依赖导致冲突。
+
+## Windows安装Anaconda
+### 第一步 下载
+在Anaconda官网[(https://www.anaconda.com/products/individual)](https://www.anaconda.com/products/individual)选择下载Windows Python3.7 64-Bit版本
+
+### 第二步 安装
+运行下载的安装包(以.exe为后辍)，根据引导完成安装, 用户可自行修改安装目录（如下图）
+![](./images/anaconda_windows.png)
+
+### 第三步 使用
+- 点击Windows系统左下角的Windows图标，打开：所有程序->Anaconda3/2（64-bit）->Anaconda Prompt  
+- 在命令行中执行下述命令
+```cmd
+# 创建名为my_paddlex的环境，指定Python版本为3.7
+conda create -n my_paddlex python=3.7
+# 进入my_paddlex环境
+conda activate my_paddlex
+# 安装git
+conda install git
+# 安装pycocotools
+pip install cython
+pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
+# 安装paddlepaddle-gpu
+pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
+# 安装paddlex
+pip install paddlex -i https://mirror.baidu.com/pypi/simple
+```  
+按如上方式配置后，即可在环境中使用PaddleX了，命令行输入`python`回车后，`import paddlex`试试吧，之后再次使用都可以通过打开'所有程序->Anaconda3/2（64-bit）->Anaconda Prompt'，再执行`conda activate my_paddlex`进入环境后，即可再次使用paddlex
+
+## Linux/Mac安装
+
+### 第一步 下载
+在Anaconda官网[(https://www.anaconda.com/products/individual)](https://www.anaconda.com/products/individual)选择下载对应系统 Python3.7版本下载（Mac下载Command Line Installer版本即可)
+
+### 第二步 安装
+打开终端，在终端安装Anaconda
+```
+# ~/Downloads/Anaconda3-2019.07-Linux-x86_64.sh即下载的文件
+bash ~/Downloads/Anaconda3-2019.07-Linux-x86_64.sh
+```
+安装过程中一直回车即可，如提示设置安装路径，可根据需求修改，一般默认即可。
+
+### 第三步 使用
+```
+# 创建名为my_paddlex的环境，指定Python版本为3.7
+conda create -n my_paddlex python=3.7
+# 进入paddlex环境
+conda activate my_paddlex
+# 安装pycocotools
+pip install cython
+pip install pycocotools
+# 安装paddlepaddle-gpu
+pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
+# 安装paddlex
+pip install paddlex -i https://mirror.baidu.com/pypi/simple
+```
+按如上方式配置后，即可在环境中使用PaddleX了，终端输入`python`回车后，`import paddlex`试试吧，之后再次使用只需再打开终端，再执行`conda activate my_paddlex`进入环境后，即可使用paddlex
--- a/docs/apis/images/insect_bbox_pr_curve(iou-0.5).png
+++ b/docs/apis/images/insect_bbox_pr_curve(iou-0.5).png
--- a/docs/apis/images/xiaoduxiong_bbox_pr_curve(iou-0.5).png
+++ b/docs/apis/images/xiaoduxiong_bbox_pr_curve(iou-0.5).png
--- a/docs/apis/images/xiaoduxiong_segm_pr_curve(iou-0.5).png
+++ b/docs/apis/images/xiaoduxiong_segm_pr_curve(iou-0.5).png
--- a/docs/apis/models.md
+++ b/docs/apis/models.md
@@ -17,7 +17,7 @@ paddlex.cls.ResNet50(num_classes=1000)
 #### 分类器训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05)
+> train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5)
 > ```
 >
 > **参数：**
@@ -37,6 +37,8 @@ paddlex.cls.ResNet50(num_classes=1000)
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
 > > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### 分类器评估函数接口

@@ -75,7 +77,7 @@ paddlex.cls.ResNet50(num_classes=1000)

 ### 其它分类器类

-除`ResNet50`外，`paddlex.cls`下还提供了`ResNet18`、`ResNet34`、`ResNet101`、`ResNet50_vd`、`ResNet101_vd`、`DarkNet53`、`MobileNetV1`、`MobileNetV2`、`MobileNetV3_small`、`MobileNetV3_large`、`Xception41`、`Xception65`、`Xception71`、`ShuffleNetV2`,  使用方式（包括函数接口和参数）均与`ResNet50`一致，各模型效果可参考[模型库](../model_zoo.md)中列表。
+除`ResNet50`外，`paddlex.cls`下还提供了`ResNet18`、`ResNet34`、`ResNet101`、`ResNet50_vd`、`ResNet101_vd`、`ResNet50_vd_ssld`、`ResNet101_vd_ssld`、`DarkNet53`、`MobileNetV1`、`MobileNetV2`、`MobileNetV3_small`、`MobileNetV3_large`、`MobileNetV3_small_ssld`、`MobileNetV3_large_ssld`、`Xception41`、`Xception65`、`Xception71`、`ShuffleNetV2`,  使用方式（包括函数接口和参数）均与`ResNet50`一致，各模型效果可参考[模型库](../model_zoo.md)中列表。



@@ -109,7 +111,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_
 #### YOLOv3训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, save_interval_epochs=20, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/8000, warmup_steps=1000, warmup_start_lr=0.0, lr_decay_epochs=[213, 240], lr_decay_gamma=0.1, metric=None, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05)
+> train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, save_interval_epochs=20, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/8000, warmup_steps=1000, warmup_start_lr=0.0, lr_decay_epochs=[213, 240], lr_decay_gamma=0.1, metric=None, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5)
 > ```
 >
 > **参数：**
@@ -132,6 +134,8 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
 > > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### YOLOv3评估函数接口

@@ -186,7 +190,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec
 #### FasterRCNN训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2,save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.0025, warmup_steps=500, warmup_start_lr=1.0/1200, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False)
+> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2,save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.0025, warmup_steps=500, warmup_start_lr=1.0/1200, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False, early_stop=False, early_stop_patience=5)
 >
 > ```
 >
@@ -208,6 +212,8 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec
 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
 > > - **metric** (bool): 训练过程中评估的方式，取值范围为['COCO', 'VOC']。默认值为None。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### FasterRCNN评估函数接口

@@ -264,7 +270,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_
 #### MaskRCNN训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, save_interval_epochs=1, log_interval_steps=20, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/800, warmup_steps=500, warmup_start_lr=1.0 / 2400, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False)
+> train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, save_interval_epochs=1, log_interval_steps=20, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=1.0/800, warmup_steps=500, warmup_start_lr=1.0 / 2400, lr_decay_epochs=[8, 11], lr_decay_gamma=0.1, metric=None, use_vdl=False, early_stop=False, early_stop_patience=5)
 >
 > ```
 >
@@ -286,6 +292,8 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_
 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
 > > - **metric** (bool): 训练过程中评估的方式，取值范围为['COCO', 'VOC']。默认值为None。
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### MaskRCNN评估函数接口

@@ -350,7 +358,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
 #### DeepLabv3训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05):
+> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5):
 >
 > ```
 >
@@ -370,6 +378,8 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
 > > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### DeepLabv3评估函数接口

@@ -427,7 +437,7 @@ paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, us
 #### Unet训练函数接口

 > ```python
-> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='COCO', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05):
+> train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='COCO', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5):
 > ```
 >
 > **参数：**
@@ -446,6 +456,8 @@ paddlex.seg.UNet(num_classes=2, upsample_mode='bilinear', use_bce_loss=False, us
 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
 > > - **sensitivities_file** (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
+> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
+> > - **early_stop_patience** (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内连续下降或持平，则终止训练。默认值为5。

 #### Unet评估函数接口


--- a/docs/apis/transforms/det_transforms.md
+++ b/docs/apis/transforms/det_transforms.md
@@ -122,56 +122,42 @@ paddlex.det.transforms.MixupImage(alpha=1.5, beta=1.5, mixup_epoch=-1)

 ## RandomExpand类
 ```python
-paddlex.det.transforms.RandomExpand(max_ratio=4., prob=0.5, mean=[127.5, 127.5, 127.5])
+paddlex.det.transforms.RandomExpand(ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53])
 ```

-随机扩张图像，模型训练时的数据增强操作，模型训练时的数据增强操作。  
-1. 随机选取扩张比例（扩张比例大于1时才进行扩张）。  
-2. 计算扩张后图像大小。  
-3. 初始化像素值为数据集均值的图像，并将原图像随机粘贴于该图像上。  
+随机扩张图像，模型训练时的数据增强操作。
+1. 随机选取扩张比例（扩张比例大于1时才进行扩张）。
+2. 计算扩张后图像大小。
+3. 初始化像素值为输入填充值的图像，并将原图像随机粘贴于该图像上。
 4. 根据原图像粘贴位置换算出扩张后真实标注框的位置坐标。
+5. 根据原图像粘贴位置换算出扩张后真实分割区域的位置坐标。

 ### 参数
-* **max_ratio** (float): 图像扩张的最大比例。默认为4.0。
+* **ratio** (float): 图像扩张的最大比例。默认为4.0。
 * **prob** (float): 随机扩张的概率。默认为0.5。
-* **mean** (list): 图像数据集的均值（0-255）。默认为[127.5, 127.5, 127.5]。
+* **fill_value** (list): 扩张图像的初始填充值（0-255）。默认为[123.675, 116.28, 103.53]。

 ## RandomCrop类
 ```python
-paddlex.det.transforms.RandomCrop(batch_sampler=None, satisfy_all=False, avoid_no_bbox=True)
+paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, .5, .7, .9], scaling=[.3, 1.], num_attempts=50, allow_no_crop=True, cover_all_box=False)
 ```

 随机裁剪图像，模型训练时的数据增强操作。  
-1. 根据batch_sampler计算获取裁剪候选区域的位置。  
-    (1) 根据min scale、max scale、min aspect ratio、max aspect ratio计算随机剪裁的高、宽。  
-    (2) 根据随机剪裁的高、宽随机选取剪裁的起始点。  
-    (3) 筛选出裁剪候选区域：  
-    * 当satisfy_all为True时，需所有真实标注框与裁剪候选区域的重叠度满足需求时，该裁剪候选区域才可保留。  
-    * 当satisfy_all为False时，当有一个真实标注框与裁剪候选区域的重叠度满足需求时，该裁剪候选区域就可保留。  
-2. 遍历所有裁剪候选区域：  
-    (1) 若真实标注框与候选裁剪区域不重叠，或其中心点不在候选裁剪区域，则将该真实标注框去除。  
-    (2) 计算相对于该候选裁剪区域，真实标注框的位置，并筛选出对应的类别、混合得分。  
-    (3) 若avoid_no_bbox为False，返回当前裁剪后的信息即可；反之，要找到一个裁剪区域中真实标注框个数不为0的区域，才返回裁剪后的信息。
+1. 若allow_no_crop为True，则在thresholds加入’no_crop’。
+2. 随机打乱thresholds。
+3. 遍历thresholds中各元素：
+    (1) 如果当前thresh为’no_crop’，则返回原始图像和标注信息。
+    (2) 随机取出aspect_ratio和scaling中的值并由此计算出候选裁剪区域的高、宽、起始点。
+    (3) 计算真实标注框与候选裁剪区域IoU，若全部真实标注框的IoU都小于thresh，则继续第3步。
+    (4) 如果cover_all_box为True且存在真实标注框的IoU小于thresh，则继续第3步。
+    (5) 筛选出位于候选裁剪区域内的真实标注框，若有效框的个数为0，则继续第3步，否则进行第4步。
+4. 换算有效真值标注框相对候选裁剪区域的位置坐标。
+5. 换算有效分割区域相对候选裁剪区域的位置坐标。

 ### 参数
-* **batch_sampler** (list): 随机裁剪参数的多种组合，每种组合包含8个值，如下：
-    - max sample (int)：满足当前组合的裁剪区域的个数上限。
-    - max trial (int): 查找满足当前组合的次数。
-    - min scale (float): 裁剪面积相对原面积，每条边缩短比例的最小限制。
-    - max scale (float): 裁剪面积相对原面积，每条边缩短比例的最大限制。
-    - min aspect ratio (float): 裁剪后短边缩放比例的最小限制。
-    - max aspect ratio (float): 裁剪后短边缩放比例的最大限制。
-    - min overlap (float): 真实标注框与裁剪图像重叠面积的最小限制。
-    - max overlap (float): 真实标注框与裁剪图像重叠面积的最大限制。
-
-    默认值为None，当为None时采用如下设置：
-
-    [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],  
-     [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-* **satisfy_all** (bool): 是否需要所有标注框满足条件，裁剪候选区域才保留。默认为False。
-* **avoid_no_bbox** (bool)： 是否对裁剪图像不存在标注框的图像进行保留。默认为True。
+* **aspect_ratio** (list): 裁剪后短边缩放比例的取值范围，以[min, max]形式表示。默认值为[.5, 2.]。
+* **thresholds** (list): 判断裁剪候选区域是否有效所需的IoU阈值取值列表。默认值为[.0, .1, .3, .5, .7, .9]。
+* **scaling** (list): 裁剪面积相对原面积的取值范围，以[min, max]形式表示。默认值为[.3, 1.]。
+* **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
+* **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
+* **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
--- a/docs/apis/visualize.md
+++ b/docs/apis/visualize.md
@@ -3,7 +3,7 @@ PaddleX提供了一系列模型预测和结果分析的可视化函数。

 ## 目标检测/实例分割预测结果可视化
 ```
-paddlex.det.visualize(image, result, threshold=0.5, save_dir=None)
+paddlex.det.visualize(image, result, threshold=0.5, save_dir='./')
 ```
 将目标检测/实例分割模型预测得到的Box框和Mask在原图上进行可视化

@@ -11,7 +11,7 @@ paddlex.det.visualize(image, result, threshold=0.5, save_dir=None)
 > * **image** (str): 原图文件路径。  
 > * **result** (str): 模型预测结果。
 > * **threshold**(float): score阈值，将Box置信度低于该阈值的框过滤不进行可视化。默认0.5
-> * **save_dir**(str): 可视化结果保存路径。若为None，则表示不保存，该函数将可视化的结果以np.ndarray的形式返回；若设为目录路径，则将可视化结果保存至该目录下
+> * **save_dir**(str): 可视化结果保存路径。若为None，则表示不保存，该函数将可视化的结果以np.ndarray的形式返回；若设为目录路径，则将可视化结果保存至该目录下。默认值为'./'。

 ### 使用示例
 > 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong.jpeg)
@@ -23,17 +23,81 @@ pdx.det.visualize('xiaoduxiong.jpeg', result, save_dir='./')
 # 预测结果保存在./visualize_xiaoduxiong.jpeg
 ```

+## 目标检测/实例分割准确率-召回率可视化
+```
+paddlex.det.draw_pr_curve(eval_details_file=None, gt=None, pred_bbox=None, pred_mask=None, iou_thresh=0.5, save_dir='./')
+```
+将目标检测/实例分割模型评估结果中各个类别的准确率和召回率的对应关系进行可视化，同时可视化召回率和置信度阈值的对应关系。
+
+### 参数
+> * **eval_details_file** (str): 模型评估结果的保存路径，包含真值信息和预测结果。默认值为None。
+> * **gt** (list): 数据集的真值信息。默认值为None。
+> * **pred_bbox** (list): 模型在数据集上的预测框。默认值为None。
+> * **pred_mask** (list): 模型在数据集上的预测mask。默认值为None。
+> * **iou_thresh** (float): 判断预测框或预测mask为真阳时的IoU阈值。默认值为0.5。
+> * **save_dir** (str): 可视化结果保存路径。默认值为'./'。
+
+**注意：**`eval_details_file`的优先级更高，只要`eval_details_file`不为None，就会从`eval_details_file`提取真值信息和预测结果做分析。当`eval_details_file`为None时，则用`gt`、`pred_mask`、`pred_mask`做分析。
+
+### 使用示例
+> 示例一：
+点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/xiaoduxiong_epoch_12.tar.gz)和[数据集](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz)
+```
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+eval_transforms = transforms.Compose([
+    transforms.Normalize(),
+    transforms.ResizeByShort(short_size=800, max_size=1333),
+    transforms.Padding(coarsest_stride=32)
+])
+
+eval_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/val.json',
+    transforms=eval_transforms)
+
+model = pdx.load_model('xiaoduxiong_epoch_12')
+metrics, evaluate_details = model.evaluate(eval_dataset, batch_size=1, return_details=True)
+gt = evaluate_details['gt']
+bbox = evaluate_details['bbox']
+mask = evaluate_details['mask']
+
+# 分别可视化bbox和mask的准召曲线
+pdx.det.draw_pr_curve(gt=gt, pred_bbox=bbox, pred_mask=mask, save_dir='./xiaoduxiong')
+```
+预测框的各个类别的准确率和召回率的对应关系、召回率和置信度阈值的对应关系可视化如下：
+![](./images/xiaoduxiong_bbox_pr_curve(iou-0.5).png)
+
+预测mask的各个类别的准确率和召回率的对应关系、召回率和置信度阈值的对应关系可视化如下：
+![](./images/xiaoduxiong_segm_pr_curve(iou-0.5).png)
+
+> 示例二：
+使用[yolov3_darknet53.py示例代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py)训练完成后，加载模型评估结果文件进行分析:
+
+```
+import paddlex as pdx
+eval_details_file = 'output/yolov3_darknet53/best_model/eval_details.json'
+pdx.det.draw_pr_curve(eval_details_file, save_dir='./insect')
+```
+预测框的各个类别的准确率和召回率的对应关系、召回率和置信度阈值的对应关系可视化如下：
+![](./images/insect_bbox_pr_curve(iou-0.5).png)
+
 ## 语义分割预测结果可视化
 ```
-paddlex.seg.visualize(image, result, weight=0.6, save_dir=None)
+paddlex.seg.visualize(image, result, weight=0.6, save_dir='./')
 ```
 将语义分割模型预测得到的Mask在原图上进行可视化

 ### 参数
 > * **image** (str): 原图文件路径。  
 > * **result** (str): 模型预测结果。
-> * **weight**(float): mask可视化结果与原图权重因子，weight表示原图的权重。默认0.6
-> * **save_dir**(str): 可视化结果保存路径。若为None，则表示不保存，该函数将可视化的结果以np.ndarray的形式返回；若设为目录路径，则将可视化结果保存至该目录下
+> * **weight**(float): mask可视化结果与原图权重因子，weight表示原图的权重。默认0.6。
+> * **save_dir**(str): 可视化结果保存路径。若为None，则表示不保存，该函数将可视化的结果以np.ndarray的形式返回；若设为目录路径，则将可视化结果保存至该目录下。默认值为'./'。

 ### 使用示例
 > 点击下载如下示例中的[模型](https://bj.bcebos.com/paddlex/models/cityscape_deeplab.tar.gz)和[测试图片](https://bj.bcebos.com/paddlex/datasets/city.png)

--- a/docs/deploy.md
+++ b/docs/deploy.md
-# 模型部署导出
-
-### 导出inference模型
-
-在服务端部署的模型需要首先将模型导出为inference格式模型，导出的模型将包括`__model__`、`__params__`和`model.yml`三个文名，分别为模型的网络结构，模型权重和模型的配置文件（包括数据预处理参数等等）。在安装完PaddleX后，在命令行终端使用如下命令导出模型到当前目录`inferece_model`下。
-
-> 可直接下载垃圾检测模型测试本文档的流程[garbage_epoch_12.tar.gz](https://bj.bcebos.com/paddlex/models/garbage_epoch_12.tar.gz)
-
-```
-paddlex --export_inference --model_dir=./garbage_epoch_12 --save_dir=./inference_model
-```
-
-## 模型C++和Python部署方案预计一周内推出...
--- a/docs/deploy/deploy.md
+++ b/docs/deploy/deploy.md
+# 模型预测部署
+
+本文档指引用户如何采用更高性能地方式来部署使用PaddleX训练的模型。使用本文档模型部署方式，会在模型运算过程中，对模型计算图进行优化，同时减少内存操作，相对比普通的paddlepaddle模型加载和预测方式，预测速度平均可提升1倍，具体各模型性能对比见[预测性能对比](#预测性能对比)
+
+## 服务端部署
+
+### 导出inference模型
+
+在服务端部署的模型需要首先将模型导出为inference格式模型，导出的模型将包括`__model__`、`__params__`和`model.yml`三个文名，分别为模型的网络结构，模型权重和模型的配置文件（包括数据预处理参数等等）。在安装完PaddleX后，在命令行终端使用如下命令导出模型到当前目录`inferece_model`下。
+
+> 可直接下载垃圾检测模型测试本文档的流程[garbage_epoch_12.tar.gz](https://bj.bcebos.com/paddlex/models/garbage_epoch_12.tar.gz)
+
+```
+paddlex --export_inference --model_dir=./garbage_epoch_12 --save_dir=./inference_model
+```
+
+### Python部署
+PaddleX已经集成了基于Python的高性能预测接口，在安装PaddleX后，可参照如下代码示例，进行预测。相关的接口文档可参考[paddlex.deploy](apis/deploy.md)
+> 点击下载测试图片 [garbage.bmp](https://bj.bcebos.com/paddlex/datasets/garbage.bmp)
+```
+import paddlex as pdx
+predictorpdx.deploy.create_predictor('./inference_model')
+result = predictor.predict(image='garbage.bmp')
+```
+
+### C++部署
+
+C++部署方案位于目录`deploy/cpp/`下，且独立于PaddleX其他模块。该方案支持在 Windows 和 Linux 完成编译、二次开发集成和部署运行。具体使用方法和编译：
+
+- Linux平台：[linux](deploy_cpp_linux.md)
+- window平台：[windows](deploy_cpp_win_vs2019.md)
+
+### 预测性能对比
+
+#### 测试环境
+
+- CUDA 9.0
+- CUDNN 7.5
+- PaddlePaddle 1.71
+- GPU: Tesla P40
+- AnalysisPredictor 指采用Python的高性能预测方式
+- Executor 指采用paddlepaddle普通的python预测方式
+- Batch Size均为1，耗时单位为ms/image，只计算模型运行时间，不包括数据的预处理和后处理
+
+| 模型 | AnalysisPredictor耗时 | Executor耗时 | 输入图像大小 |
+| :---- | :--------------------- | :------------ | :------------ |
+| resnet50 | 4.84 | 7.57 | 224*224 |
+| mobilenet_v2 | 3.27 | 5.76 | 224*224 |
+| unet | 22.51 | 34.60 |513*513 |
+| deeplab_mobile | 63.44 | 358.31 |1025*2049 |
+| yolo_mobilenetv2 | 15.20 | 19.54 |  608*608 |
+| faster_rcnn_r50_fpn_1x | 50.05 | 69.58 |800*1088 |
+| faster_rcnn_r50_1x | 326.11 | 347.22 | 800*1067 |
+| mask_rcnn_r50_fpn_1x | 67.49 | 91.02 | 800*1088 |
+| mask_rcnn_r50_1x | 326.11 | 350.94 | 800*1067 |
+
+## 移动端部署
+
+> Lite模型导出正在集成中，即将开源...
--- a/docs/deploy/deploy_cpp_linux.md
+++ b/docs/deploy/deploy_cpp_linux.md
+# Linux平台编译指南
+
+## 说明
+本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过，如果需要使用更高G++版本编译使用，则需要重新编译Paddle预测库，请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。
+
+## 前置条件
+* G++ 4.8.2 ~ 4.9.4
+* CUDA 9.0 / CUDA 10.0, CUDNN 7+ （仅在使用GPU版本的预测库时需要）
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件，**下面所有示例以工作目录 `/root/projects/`演示**。
+
+### Step1: 下载代码
+
+ `git clone https://github.com/PaddlePaddle/PaddleX.git`
+
+**说明**：其中`C++`预测代码在`/root/projects/PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+PaddlePaddle C++ 预测库针对不同的`CPU`，`CUDA`，以及是否支持TensorRT，提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
+
+
+下载并解压后`/root/projects/fluid_inference`目录包含内容为：
+```
+fluid_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译，使用高版本`GCC`可能存在 `ABI`兼容性问题，建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。
+
+
+### Step4: 编译
+
+编译`cmake`的命令在`scripts/build.sh`中，请根据实际情况修改主要参数，其主要内容说明如下：
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=ON
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+# 上一步下载的 Paddle 预测库路径
+PADDLE_DIR=/root/projects/deps/fluid_inference/
+# CUDA 的 lib 路径
+CUDA_LIB=/usr/local/cuda/lib64/
+# CUDNN 的 lib 路径
+CUDNN_LIB=/usr/local/cudnn/lib64/
+
+# OPENCV 路径, 如果使用自带预编译版本可不设置
+OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
+sh $(pwd)/scripts/bootstrap.sh
+
+# 以下无需改动
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+    -DWITH_GPU=${WITH_GPU} \
+    -DWITH_TENSORRT=${WITH_TENSORRT} \
+    -DPADDLE_DIR=${PADDLE_DIR} \
+    -DCUDA_LIB=${CUDA_LIB} \
+    -DCUDNN_LIB=${CUDNN_LIB} \
+    -DOPENCV_DIR=${OPENCV_DIR}
+make
+
+```
+
+修改脚本设置好主要参数后，执行`build`脚本：
+ ```shell
+ sh ./scripts/build.sh
+ ```
+
+### Step5: 预测及可视化
+编译成功后，预测demo的可执行程序分别为`build/detector`，`build/classifer`，`build/segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+
+|  参数   | 说明  |
+|  ----  | ----  |
+| model_dir  | 导出的预测模型所在路径 |
+| image  | 要预测的图片文件路径 |
+| image_list  | 按行存储图片路径的.txt文件 |
+| use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
+| gpu_id  | GPU 设备ID, 默认值为0 |
+| save_dir | 保存可视化结果的路径, 默认值为"output"，classfier无该参数 |
+
+## 样例
+
+可使用[垃圾检测模型](deploy.md#导出inference模型)中生成的`inference_model`模型和测试图片进行预测。
+
+`样例一`：
+
+不使用`GPU`测试图片 `/path/to/garbage.bmp`  
+
+```shell
+./build/detector --model_dir=/path/to/inference_model --image=/path/to/garbage.bmp --save_dir=output
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+
+`样例二`:
+
+使用`GPU`预测多个图片`/path/to/image_list.txt`，image_list.txt内容的格式如下：
+```
+/path/to/images/garbage1.jpeg
+/path/to/images/garbage2.jpeg
+...
+/path/to/images/garbagen.jpeg
+```
+```shell
+./build/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
--- a/docs/deploy/deploy_cpp_win_vs2019.md
+++ b/docs/deploy/deploy_cpp_win_vs2019.md
+# Visual Studio 2019 Community CMake 编译指南
+
+## 说明
+Windows 平台下，我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目，但是直到`2019`才提供了稳定和完全的支持，所以如果你想使用CMake管理项目编译构建，我们推荐你使用`Visual Studio 2019`环境下构建。
+
+## 前置条件
+* Visual Studio 2019
+* CUDA 9.0 / CUDA 10.0, CUDNN 7+ （仅在使用GPU版本的预测库时需要）
+* CMake 3.0+
+
+请确保系统已经安装好上述基本软件，我们使用的是`VS2019`的社区版。
+
+**下面所有示例以工作目录为 `D:\projects`演示**。
+
+### Step1: 下载代码
+
+下载源代码
+```shell
+d:
+mkdir projects
+cd projects
+git clone https://github.com/PaddlePaddle/PaddleX.git
+```
+
+**说明**：其中`C++`预测代码在`PaddleX/deploy/cpp` 目录，该目录不依赖任何`PaddleX`下其他目录。
+
+
+### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
+
+PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)
+
+解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为：
+```
+├── \paddle\ # paddle核心库和头文件
+|
+├── \third_party\ # 第三方依赖库和头文件
+|
+└── \version.txt # 版本和编译信息
+```
+
+### Step3: 安装配置OpenCV
+
+1. 在OpenCV官网下载适用于Windows平台的3.4.6版本， [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)  
+2. 运行下载的可执行文件，将OpenCV解压至指定目录，如`D:\projects\opencv`
+3. 配置环境变量，如下流程所示  
+    - 我的电脑->属性->高级系统设置->环境变量
+    - 在系统变量中找到Path（如没有，自行创建），并双击编辑
+    - 新建，将opencv路径填入并保存，如`D:\projects\opencv\build\x64\vc14\bin`
+
+### Step4: 使用Visual Studio 2019直接编译CMake
+
+1. 打开Visual Studio 2019 Community，点击`继续但无需代码`
+
+![step2](images/vs2019_step1.png)
+
+2. 点击： `文件`->`打开`->`CMake`
+
+![step2.1](images/vs2019_step2.png)
+
+选择项目代码所在路径，并打开`CMakeList.txt`：
+
+![step2.2](images/vs2019_step3.png)
+
+3. 点击：`项目`->`PADDLEX_INFERENCE的CMake设置`
+
+![step3](images/vs2019_step4.png)
+
+4. 点击`浏览`，分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
+
+依赖库路径的含义说明如下（带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐，**使用9.0、10.0版本，不使用9.2、10.1等版本CUDA库**）：
+
+|  参数名   | 含义  |
+|  ----  | ----  |
+| *CUDA_LIB  | CUDA的库路径, 注：请将CUDNN的cudnn.lib文件拷贝到CUDA_LIB路径下 |
+| OPENCV_DIR  | OpenCV的安装路径， |
+| PADDLE_DIR | Paddle c++预测库的路径 |
+
+**注意：** 1. 使用`CPU`版预测库，请把`WITH_GPU`的`值`去掉勾 2. 如果使用的是`openblas`版本，请把`WITH_MKL`的`值`去掉勾
+
+![step4](images/vs2019_step5.png)
+
+**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
+
+5. 点击`生成`->`全部生成`
+
+![step6](images/vs2019_step6.png)
+
+
+### Step5: 预测及可视化
+
+上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下，打开`cmd`，并切换到该目录：
+
+```
+d:
+cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
+```
+
+编译成功后，预测demo的入口程序为`detector`，`classifer`，`segmenter`，用户可根据自己的模型类型选择，其主要命令参数说明如下：
+
+|  参数   | 说明  |
+|  ----  | ----  |
+| model_dir  | 导出的预测模型所在路径 |
+| image  | 要预测的图片文件路径 |
+| image_list  | 按行存储图片路径的.txt文件 |
+| use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
+| gpu_id  | GPU 设备ID, 默认值为0 |
+| save_dir | 保存可视化结果的路径, 默认值为"output"，classfier无该参数 |
+
+
+## 样例
+
+可使用[垃圾检测模型](deploy.md#导出inference模型)中生成的`inference_model`模型和测试图片进行预测。
+
+`样例一`：
+
+不使用`GPU`测试图片  `\\path\\to\\garbage.bmp`  
+
+```shell
+.\detector --model_dir=\\path\\to\\inference_model --image=D:\\images\\garbage.bmp --save_dir=output
+
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
+
+`样例二`:
+
+使用`GPU`预测多个图片`\\path\\to\\image_list.txt`，image_list.txt内容的格式如下：
+```
+\\path\\to\\images\\garbage1.jpeg
+\\path\\to\\images\\garbage2.jpeg
+...
+\\path\\to\\images\\garbagen.jpeg
+```
+```shell
+.\detector --model_dir=\\path\\to\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output
+```
+图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
+
--- a/docs/deploy/images/vs2019_step1.png
+++ b/docs/deploy/images/vs2019_step1.png
--- a/docs/deploy/images/vs2019_step2.png
+++ b/docs/deploy/images/vs2019_step2.png
--- a/docs/deploy/images/vs2019_step3.png
+++ b/docs/deploy/images/vs2019_step3.png
--- a/docs/deploy/images/vs2019_step4.png
+++ b/docs/deploy/images/vs2019_step4.png
--- a/docs/deploy/images/vs2019_step5.png
+++ b/docs/deploy/images/vs2019_step5.png
--- a/docs/deploy/images/vs2019_step6.png
+++ b/docs/deploy/images/vs2019_step6.png
--- a/docs/images/anaconda_windows.png
+++ b/docs/images/anaconda_windows.png
--- a/docs/install.md
+++ b/docs/install.md
 # 安装

-> 以下安装过程默认用户已安装好**paddlepaddle-gpu或paddlepaddle(版本大于或等于1.7.1)**，paddlepaddle安装方式参照[飞桨官网](https://www.paddlepaddle.org.cn/install/quick)
+以下安装过程默认用户已安装好**paddlepaddle-gpu或paddlepaddle(版本大于或等于1.7.1)**，paddlepaddle安装方式参照[飞桨官网](https://www.paddlepaddle.org.cn/install/quick)
+
+> 推荐使用Anaconda Python环境，Anaconda下安装PaddleX参考文档[Anaconda安装使用](./anaconda_install.md)

 ## Github代码安装
 github代码会跟随开发进度不断更新

--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@@ -4,35 +4,38 @@
 表中相关模型也可下载好作为相应模型的预训练模型，通过`pretrain_weights`指定目录加载使用。

 ## 图像分类模型
-> 表中模型相关指标均为在ImageNet数据集上使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P4），预测速度为每张图片预测用时（不包括预处理和后处理）,表中符号`-`表示相关指标暂未测试。
+> 表中模型相关指标均为在ImageNet数据集上使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla P40），预测速度为每张图片预测用时（不包括预处理和后处理）,表中符号`-`表示相关指标暂未测试。


-| 模型  | 模型大小 | 预测速度（毫秒） | Top1准确率 | Top5准确率 |
+| 模型  | 模型大小 | 预测速度（毫秒） | Top1准确率（%） | Top5准确率（%） |
 | :----|  :------- | :----------- | :--------- | :--------- |
-| ResNet18| 46.9MB   | 3.456        | 70.98%     | 89.92%     |
-| ResNet34| 87.5MB   | 5.668        | 74.57%     | 92.14%     |
-| ResNet50| 102.7MB  | 8.787        | 76.50%     | 93.00%     |
-| ResNet101 |179.1MB  | 15.447      | 77.56%     | 93.64%    |
-| ResNet50_vd |102.8MB  | 9.058        | 79.12%     | 94.44%     |
-| ResNet101_vd| 179.2MB  | 15.685       | 80.17%     | 94.97%     |
-| DarkNet53|166.9MB  | 11.969       | 78.04%     | 94.05%     |
-| MobileNetV1 | 16.4MB   | 2.609        | 70.99%     | 89.68%     |
-| MobileNetV2 | 14.4MB   | 4.546        | 72.15%     | 90.65%     |
-| MobileNetV3_large|  22.8MB   | -        | 75.3%     | 75.3%     |
-| MobileNetV3_small |  12.5MB   | 6.809        | 67.46%     | 87.12%     |
-| Xception41 |92.4MB   | 13.757       | 79.30%     | 94.53%     |
-| Xception65 | 144.6MB  | 19.216       | 81.00%     | 95.49%     |
-| Xception71| 151.9MB  | 23.291       | 81.11%     | 95.45%     |
-| DenseNet121 | 32.8MB   | 12.437       | 75.66%     | 92.58%     |
-| DenseNet161|116.3MB  | 27.717       | 78.57%     | 94.14%     |
-| DenseNet201|  84.6MB   | 26.583       | 77.63%     | 93.66%     |
-| ShuffleNetV2 | 10.2MB   | 6.101        | 68.8%     | 88.5%     |
+| ResNet18| 46.9MB   | 1.499        | 71.0     | 89.9     |
+| ResNet34| 87.5MB   | 2.272        | 74.6    | 92.1    |
+| ResNet50| 102.7MB  | 2.939        | 76.5     | 93.0     |
+| ResNet101 |179.1MB  | 5.314      | 77.6     | 93.6  |
+| ResNet50_vd |102.8MB  | 3.165        | 79.1     | 94.4     |
+| ResNet101_vd| 179.2MB  | 5.252       | 80.2   | 95.0     |
+| ResNet50_vd_ssld |102.8MB  | 3.165        | 82.4     | 96.1     |
+| ResNet101_vd_ssld| 179.2MB  | 5.252       | 83.7   | 96.7     |
+| DarkNet53|166.9MB  | 3.139       | 78.0     | 94.1     |
+| MobileNetV1 | 16.0MB   | 32.523        | 71.0     | 89.7    |
+| MobileNetV2 | 14.0MB   | 23.318        | 72.2     | 90.7    |
+| MobileNetV3_large|  21.0MB   | 19.308        | 75.3    | 93.2   |
+| MobileNetV3_small |  12.0MB   | 6.546        | 68.2    | 88.1     |
+| MobileNetV3_large_ssld|  21.0MB   | 19.308        | 79.0     | 94.5     |
+| MobileNetV3_small_ssld |  12.0MB   | 6.546        | 71.3     | 90.1     |
+| Xception41 |92.4MB   | 4.408       | 79.6    | 94.4     |
+| Xception65 | 144.6MB  | 6.464       | 80.3     | 94.5     |
+| DenseNet121 | 32.8MB   | 4.371       | 75.7     | 92.6     |
+| DenseNet161|116.3MB  | 8.863       | 78.6     | 94.1     |
+| DenseNet201|  84.6MB   | 8.173       | 77.6     | 93.7     |
+| ShuffleNetV2 | 9.0MB   | 10.941        | 68.8     | 88.5     |

 ## 目标检测模型

 > 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到（测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。

-| 模型    | 模型大小    | 预测时间(毫秒) | BoxAP |
+| 模型    | 模型大小    | 预测时间(毫秒) | BoxAP（%） |
 |:-------|:-----------|:-------------|:----------|
 |FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 |
 |FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 |
@@ -50,7 +53,7 @@

 > 表中模型相关指标均为在MSCOCO数据集上测试得到。

-| 模型 |模型大小 | 预测时间(毫秒) | BoxAP | SegAP |
+| 模型 |模型大小 | 预测时间(毫秒) | BoxAP | SegAP（%） |
 |:---------|:---------|:----------|:---------|:--------|
 |MaskRCNN-ResNet50|51.2MB| 86.096 | 36.5 |32.2|
 |MaskRCNN-ResNet50-FPN|184.6MB | 65.859 | 37.9 |34.2|

--- a/docs/quick_start.md
+++ b/docs/quick_start.md
 # 10分钟快速上手使用

-本文档在一个小数据集上展示了如何通过PaddleX进行训练，您可以阅读PaddleX的**使用教程**来了解更多模型任务的训练使用方式。本示例同步在AIStudio上，可直接[在线体验模型训练](https://aistudio.baidu.com/aistudio/projectdetail/423472)
+本文档在一个小数据集上展示了如何通过PaddleX进行训练，您可以阅读PaddleX的**使用教程**来了解更多模型任务的训练使用方式。本示例同步在AIStudio上，可直接[在线体验模型训练](https://aistudio.baidu.com/aistudio/projectdetail/439860)

 ## 1. 准备蔬菜分类数据集
 ```

--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -20,6 +20,12 @@ from . import seg
 from . import cls
 from . import slim

+try:
+    import pycocotools
+except:
+    print("[WARNING] pycocotools is not installed, detection model is not available now.")
+    print("[WARNING] pycocotools install: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/install.md")
+
 env_info = get_environ_info()
 load_model = cv.models.load_model
 datasets = cv.datasets

--- a/paddlex/cv/datasets/coco.py
+++ b/paddlex/cv/datasets/coco.py
@@ -19,7 +19,6 @@ import random
 import numpy as np
 import paddlex.utils.logging as logging
 import paddlex as pst
-from pycocotools.coco import COCO
 from .voc import VOCDetection
 from .dataset import is_pic

@@ -47,6 +46,8 @@ class CocoDetection(VOCDetection):
                 buffer_size=100,
                 parallel_method='process',
                 shuffle=False):
+        from pycocotools.coco import COCO
+
        super(VOCDetection, self).__init__(
            transforms=transforms,
            num_workers=num_workers,

--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
@@ -18,7 +18,6 @@ import os.path as osp
 import random
 import numpy as np
 import xml.etree.ElementTree as ET
-from pycocotools.coco import COCO
 import paddlex.utils.logging as logging
 from .dataset import Dataset
 from .dataset import is_pic
@@ -51,6 +50,7 @@ class VOCDetection(Dataset):
                 buffer_size=100,
                 parallel_method='process',
                 shuffle=False):
+        from pycocotools.coco import COCO
        super(VOCDetection, self).__init__(
            transforms=transforms,
            num_workers=num_workers,

--- a/paddlex/cv/models/base.py
+++ b/paddlex/cv/models/base.py
@@ -24,6 +24,7 @@ import json
 import functools
 import paddlex.utils.logging as logging
 from paddlex.utils import seconds_to_hms
+from paddlex.utils.utils import EarlyStop
 import paddlex
 from collections import OrderedDict
 from os import path as osp
@@ -334,7 +335,9 @@ class BaseAPI:
                   save_interval_epochs=1,
                   log_interval_steps=10,
                   save_dir='output',
-                   use_vdl=False):
+                   use_vdl=False,
+                   early_stop=False,
+                   early_stop_patience=5):
        if not osp.isdir(save_dir):
            if osp.exists(save_dir):
                os.remove(save_dir)
@@ -396,6 +399,9 @@ class BaseAPI:
            train_step_component = OrderedDict()
            eval_component = OrderedDict()

+        thresh = 0.0001
+        if early_stop:
+            earlystop = EarlyStop(early_stop_patience, thresh)
        best_accuracy_key = ""
        best_accuracy = -1.0
        best_model_epoch = 1
@@ -507,3 +513,6 @@ class BaseAPI:
                    'Current evaluated best model in eval_dataset is epoch_{}, {}={}'
                    .format(best_model_epoch, best_accuracy_key,
                            best_accuracy))
+                if eval_dataset is not None and early_stop:
+                    if earlystop(current_accuracy):
+                        break
--- a/paddlex/cv/models/classifier.py
+++ b/paddlex/cv/models/classifier.py
@@ -102,7 +102,9 @@ class BaseClassifier(BaseAPI):
              lr_decay_gamma=0.1,
              use_vdl=False,
              sensitivities_file=None,
-              eval_metric_loss=0.05):
+              eval_metric_loss=0.05,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -124,6 +126,9 @@ class BaseClassifier(BaseAPI):
            sensitivities_file (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，
                则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
            eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 模型从inference model进行加载。
@@ -158,7 +163,9 @@ class BaseClassifier(BaseAPI):
            save_interval_epochs=save_interval_epochs,
            log_interval_steps=log_interval_steps,
            save_dir=save_dir,
-            use_vdl=use_vdl)
+            use_vdl=use_vdl,
+            early_stop=early_stop,
+            early_stop_patience=early_stop_patience)

    def evaluate(self,
                 eval_dataset,

--- a/paddlex/cv/models/deeplabv3p.py
+++ b/paddlex/cv/models/deeplabv3p.py
@@ -231,7 +231,9 @@ class DeepLabv3p(BaseAPI):
              lr_decay_power=0.9,
              use_vdl=False,
              sensitivities_file=None,
-              eval_metric_loss=0.05):
+              eval_metric_loss=0.05,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -252,6 +254,9 @@ class DeepLabv3p(BaseAPI):
            sensitivities_file (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，
                则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
            eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 模型从inference model进行加载。
@@ -288,7 +293,9 @@ class DeepLabv3p(BaseAPI):
            save_interval_epochs=save_interval_epochs,
            log_interval_steps=log_interval_steps,
            save_dir=save_dir,
-            use_vdl=use_vdl)
+            use_vdl=use_vdl,
+            early_stop=early_stop,
+            early_stop_patience=early_stop_patience)

    def evaluate(self,
                 eval_dataset,

--- a/paddlex/cv/models/faster_rcnn.py
+++ b/paddlex/cv/models/faster_rcnn.py
@@ -163,7 +163,9 @@ class FasterRCNN(BaseAPI):
              lr_decay_epochs=[8, 11],
              lr_decay_gamma=0.1,
              metric=None,
-              use_vdl=False):
+              use_vdl=False,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -186,6 +188,9 @@ class FasterRCNN(BaseAPI):
            lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
            metric (bool): 训练过程中评估的方式，取值范围为['COCO', 'VOC']。默认值为None。
            use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 评估类型不在指定列表中。
@@ -234,7 +239,9 @@ class FasterRCNN(BaseAPI):
            save_interval_epochs=save_interval_epochs,
            log_interval_steps=log_interval_steps,
            save_dir=save_dir,
-            use_vdl=use_vdl)
+            use_vdl=use_vdl,
+            early_stop=early_stop,
+            early_stop_patience=early_stop_patience)

    def evaluate(self,
                 eval_dataset,

--- a/paddlex/cv/models/mask_rcnn.py
+++ b/paddlex/cv/models/mask_rcnn.py
@@ -128,7 +128,9 @@ class MaskRCNN(FasterRCNN):
              lr_decay_epochs=[8, 11],
              lr_decay_gamma=0.1,
              metric=None,
-              use_vdl=False):
+              use_vdl=False,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -151,6 +153,9 @@ class MaskRCNN(FasterRCNN):
            lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
            metric (bool): 训练过程中评估的方式，取值范围为['COCO', 'VOC']。
            use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 评估类型不在指定列表中。
@@ -200,7 +205,9 @@ class MaskRCNN(FasterRCNN):
            save_interval_epochs=save_interval_epochs,
            log_interval_steps=log_interval_steps,
            save_dir=save_dir,
-            use_vdl=use_vdl)
+            use_vdl=use_vdl,
+            early_stop=early_stop,
+            early_stop_patience=early_stop_patience)

    def evaluate(self,
                 eval_dataset,

--- a/paddlex/cv/models/slim/visualize.py
+++ b/paddlex/cv/models/slim/visualize.py
@@ -15,9 +15,6 @@
 import os.path as osp
 import tqdm
 import numpy as np
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
 from .prune import cal_model_size
 from paddleslim.prune import load_sensitivities

@@ -30,6 +27,10 @@ def visualize(model, sensitivities_file, save_dir='./'):
        model (paddlex.cv.models): paddlex中的模型。
        sensitivities_file (str): 敏感度文件存储路径。
    """
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+
    program = model.test_prog
    place = model.places[0]
    fig = plt.figure()

--- a/paddlex/cv/models/unet.py
+++ b/paddlex/cv/models/unet.py
@@ -117,7 +117,9 @@ class UNet(DeepLabv3p):
              lr_decay_power=0.9,
              use_vdl=False,
              sensitivities_file=None,
-              eval_metric_loss=0.05):
+              eval_metric_loss=0.05,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -138,12 +140,17 @@ class UNet(DeepLabv3p):
            sensitivities_file (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，
                则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
            eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 模型从inference model进行加载。
        """
-        return super(UNet, self).train(
-            num_epochs, train_dataset, train_batch_size, eval_dataset,
-            save_interval_epochs, log_interval_steps, save_dir,
-            pretrain_weights, optimizer, learning_rate, lr_decay_power,
-            use_vdl, sensitivities_file, eval_metric_loss)
+        return super(
+            UNet,
+            self).train(num_epochs, train_dataset, train_batch_size,
+                        eval_dataset, save_interval_epochs, log_interval_steps,
+                        save_dir, pretrain_weights, optimizer, learning_rate,
+                        lr_decay_power, use_vdl, sensitivities_file,
+                        eval_metric_loss, early_stop, early_stop_patience)
--- a/paddlex/cv/models/utils/visualize.py
+++ b/paddlex/cv/models/utils/visualize.py
@@ -17,8 +17,11 @@ import cv2
 import numpy as np
 from PIL import Image, ImageDraw

+import paddlex.utils.logging as logging
+from .detection_eval import fixed_linspace, backup_linspace, loadRes

-def visualize_detection(image, result, threshold=0.5, save_dir=None):
+
+def visualize_detection(image, result, threshold=0.5, save_dir='./'):
    """
        Visualize bbox and mask results
    """
@@ -31,11 +34,12 @@ def visualize_detection(image, result, threshold=0.5, save_dir=None):
            os.makedirs(save_dir)
        out_path = os.path.join(save_dir, 'visualize_{}'.format(image_name))
        image.save(out_path, quality=95)
+        logging.info('The visualized result is saved as {}'.format(out_path))
    else:
        return image


-def visualize_segmentation(image, result, weight=0.6, save_dir=None):
+def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
    """
    Convert segment result to color image, and save added image.
    Args:
@@ -62,6 +66,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir=None):
        image_name = os.path.split(image)[-1]
        out_path = os.path.join(save_dir, 'visualize_{}'.format(image_name))
        cv2.imwrite(out_path, vis_result)
+        logging.info('The visualized result is saved as {}'.format(out_path))
    else:
        return vis_result

@@ -160,3 +165,130 @@ def draw_bbox_mask(image, results, threshold=0.5, alpha=0.7):
            img_array[idx[0], idx[1], :] += alpha * color_mask
            image = Image.fromarray(img_array.astype('uint8'))
    return image
+
+
+def draw_pr_curve(eval_details_file=None,
+                  gt=None,
+                  pred_bbox=None,
+                  pred_mask=None,
+                  iou_thresh=0.5,
+                  save_dir='./'):
+    if eval_details_file is not None:
+        import json
+        with open(eval_details_file, 'r') as f:
+            eval_details = json.load(f)
+            pred_bbox = eval_details['bbox']
+            if 'mask' in eval_details:
+                pred_mask = eval_details['mask']
+            gt = eval_details['gt']
+    if gt is None or pred_bbox is None:
+        raise Exception(
+            "gt/pred_bbox/pred_mask is None now, please set right eval_details_file or gt/pred_bbox/pred_mask."
+        )
+    if pred_bbox is not None and len(pred_bbox) == 0:
+        raise Exception("There is no predicted bbox.")
+    if pred_mask is not None and len(pred_mask) == 0:
+        raise Exception("There is no predicted mask.")
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+    coco = COCO()
+    coco.dataset = gt
+    coco.createIndex()
+
+    def _summarize(coco_gt, ap=1, iouThr=None, areaRng='all', maxDets=100):
+        p = coco_gt.params
+        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+        if ap == 1:
+            # dimension of precision: [TxRxKxAxM]
+            s = coco_gt.eval['precision']
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, :, aind, mind]
+        else:
+            # dimension of recall: [TxKxAxM]
+            s = coco_gt.eval['recall']
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, aind, mind]
+        if len(s[s > -1]) == 0:
+            mean_s = -1
+        else:
+            mean_s = np.mean(s[s > -1])
+        return mean_s
+
+    def cal_pr(coco_gt, coco_dt, iou_thresh, save_dir, style='bbox'):
+        import matplotlib.pyplot as plt
+        from pycocotools.cocoeval import COCOeval
+        coco_dt = loadRes(coco_gt, coco_dt)
+        np.linspace = fixed_linspace
+        coco_eval = COCOeval(coco_gt, coco_dt, style)
+        coco_eval.params.iouThrs = np.linspace(
+            iou_thresh, iou_thresh, 1, endpoint=True)
+        np.linspace = backup_linspace
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        stats = _summarize(coco_eval, iouThr=iou_thresh)
+        catIds = coco_gt.getCatIds()
+        if len(catIds) != coco_eval.eval['precision'].shape[2]:
+            raise Exception(
+                "The category number must be same as the third dimension of precisions."
+            )
+        x = np.arange(0.0, 1.01, 0.01)
+        color_map = get_color_map_list(256)[1:256]
+
+        plt.subplot(1, 2, 1)
+        plt.title(style + " precision-recall IoU={}".format(iou_thresh))
+        plt.xlabel("recall")
+        plt.ylabel("precision")
+        plt.xlim(0, 1.01)
+        plt.ylim(0, 1.01)
+        plt.grid(linestyle='--', linewidth=1)
+        plt.plot([0, 1], [0, 1], 'r--', linewidth=1)
+        my_x_ticks = np.arange(0, 1.01, 0.1)
+        my_y_ticks = np.arange(0, 1.01, 0.1)
+        plt.xticks(my_x_ticks, fontsize=5)
+        plt.yticks(my_y_ticks, fontsize=5)
+        for idx, catId in enumerate(catIds):
+            pr_array = coco_eval.eval['precision'][0, :, idx, 0, 2]
+            precision = pr_array[pr_array > -1]
+            ap = np.mean(precision) if precision.size else float('nan')
+            nm = coco_gt.loadCats(catId)[0]['name'] + ' AP={:0.2f}'.format(
+                float(ap * 100))
+            color = tuple(color_map[idx])
+            color = [float(c) / 255 for c in color]
+            color.append(0.75)
+            plt.plot(x, pr_array, color=color, label=nm, linewidth=1)
+        plt.legend(loc="lower left", fontsize=5)
+
+        plt.subplot(1, 2, 2)
+        plt.title(style + " score-recall IoU={}".format(iou_thresh))
+        plt.xlabel('recall')
+        plt.ylabel('score')
+        plt.xlim(0, 1.01)
+        plt.ylim(0, 1.01)
+        plt.grid(linestyle='--', linewidth=1)
+        plt.xticks(my_x_ticks, fontsize=5)
+        plt.yticks(my_y_ticks, fontsize=5)
+        for idx, catId in enumerate(catIds):
+            nm = coco_gt.loadCats(catId)[0]['name']
+            sr_array = coco_eval.eval['scores'][0, :, idx, 0, 2]
+            color = tuple(color_map[idx])
+            color = [float(c) / 255 for c in color]
+            color.append(0.75)
+            plt.plot(x, sr_array, color=color, label=nm, linewidth=1)
+        plt.legend(loc="lower left", fontsize=5)
+        plt.savefig(
+            os.path.join(save_dir, "./{}_pr_curve(iou-{}).png".format(
+                style, iou_thresh)),
+            dpi=800)
+        plt.close()
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    cal_pr(coco, pred_bbox, iou_thresh, save_dir, style='bbox')
+    if pred_mask is not None:
+        cal_pr(coco, pred_mask, iou_thresh, save_dir, style='segm')
--- a/paddlex/cv/models/yolo_v3.py
+++ b/paddlex/cv/models/yolo_v3.py
@@ -162,7 +162,9 @@ class YOLOv3(BaseAPI):
              metric=None,
              use_vdl=False,
              sensitivities_file=None,
-              eval_metric_loss=0.05):
+              eval_metric_loss=0.05,
+              early_stop=False,
+              early_stop_patience=5):
        """训练。

        Args:
@@ -188,6 +190,9 @@ class YOLOv3(BaseAPI):
            sensitivities_file (str): 若指定为路径时，则加载路径下敏感度信息进行裁剪；若为字符串'DEFAULT'，
                则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪；若为None，则不进行裁剪。默认为None。
            eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
+            early_stop (bool): 是否使用提前终止训练策略。默认值为False。
+            early_stop_patience (int): 当使用提前终止训练策略时，如果验证集精度在`early_stop_patience`个epoch内
+                连续下降或持平，则终止训练。默认值为5。

        Raises:
            ValueError: 评估类型不在指定列表中。
@@ -239,7 +244,9 @@ class YOLOv3(BaseAPI):
            save_interval_epochs=save_interval_epochs,
            log_interval_steps=log_interval_steps,
            save_dir=save_dir,
-            use_vdl=use_vdl)
+            use_vdl=use_vdl,
+            early_stop=early_stop,
+            early_stop_patience=early_stop_patience)

    def evaluate(self,
                 eval_dataset,

--- a/paddlex/cv/transforms/box_utils.py
+++ b/paddlex/cv/transforms/box_utils.py
@@ -19,25 +19,6 @@ import cv2
 import scipy


-def meet_emit_constraint(src_bbox, sample_bbox):
-    center_x = (src_bbox[2] + src_bbox[0]) / 2
-    center_y = (src_bbox[3] + src_bbox[1]) / 2
-    if center_x >= sample_bbox[0] and \
-            center_x <= sample_bbox[2] and \
-            center_y >= sample_bbox[1] and \
-            center_y <= sample_bbox[3]:
-        return True
-    return False
-
-
-def clip_bbox(src_bbox):
-    src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
-    src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
-    src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
-    src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
-    return src_bbox
-
-
 def bbox_area(src_bbox):
    if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
        return 0.
@@ -47,189 +28,6 @@ def bbox_area(src_bbox):
        return width * height


-def is_overlap(object_bbox, sample_bbox):
-    if object_bbox[0] >= sample_bbox[2] or \
-       object_bbox[2] <= sample_bbox[0] or \
-       object_bbox[1] >= sample_bbox[3] or \
-       object_bbox[3] <= sample_bbox[1]:
-        return False
-    else:
-        return True
-
-
-def filter_and_process(sample_bbox, bboxes, labels, scores=None):
-    new_bboxes = []
-    new_labels = []
-    new_scores = []
-    for i in range(len(bboxes)):
-        new_bbox = [0, 0, 0, 0]
-        obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
-        if not meet_emit_constraint(obj_bbox, sample_bbox):
-            continue
-        if not is_overlap(obj_bbox, sample_bbox):
-            continue
-        sample_width = sample_bbox[2] - sample_bbox[0]
-        sample_height = sample_bbox[3] - sample_bbox[1]
-        new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
-        new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
-        new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
-        new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
-        new_bbox = clip_bbox(new_bbox)
-        if bbox_area(new_bbox) > 0:
-            new_bboxes.append(new_bbox)
-            new_labels.append([labels[i][0]])
-            if scores is not None:
-                new_scores.append([scores[i][0]])
-    bboxes = np.array(new_bboxes)
-    labels = np.array(new_labels)
-    scores = np.array(new_scores)
-    return bboxes, labels, scores
-
-
-def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
-    new_bboxes = []
-    new_labels = []
-    new_scores = []
-    for i, bbox in enumerate(bboxes):
-        w = float((bbox[2] - bbox[0]) * target_size)
-        h = float((bbox[3] - bbox[1]) * target_size)
-        if w * h < float(min_size * min_size):
-            continue
-        else:
-            new_bboxes.append(bbox)
-            new_labels.append(labels[i])
-            if scores is not None and scores.size != 0:
-                new_scores.append(scores[i])
-    bboxes = np.array(new_bboxes)
-    labels = np.array(new_labels)
-    scores = np.array(new_scores)
-    return bboxes, labels, scores
-
-
-def generate_sample_bbox(sampler):
-    scale = np.random.uniform(sampler[2], sampler[3])
-    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
-    aspect_ratio = max(aspect_ratio, (scale**2.0))
-    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
-    bbox_width = scale * (aspect_ratio**0.5)
-    bbox_height = scale / (aspect_ratio**0.5)
-    xmin_bound = 1 - bbox_width
-    ymin_bound = 1 - bbox_height
-    xmin = np.random.uniform(0, xmin_bound)
-    ymin = np.random.uniform(0, ymin_bound)
-    xmax = xmin + bbox_width
-    ymax = ymin + bbox_height
-    sampled_bbox = [xmin, ymin, xmax, ymax]
-    return sampled_bbox
-
-
-def generate_sample_bbox_square(sampler, image_width, image_height):
-    scale = np.random.uniform(sampler[2], sampler[3])
-    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
-    aspect_ratio = max(aspect_ratio, (scale**2.0))
-    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
-    bbox_width = scale * (aspect_ratio**0.5)
-    bbox_height = scale / (aspect_ratio**0.5)
-    if image_height < image_width:
-        bbox_width = bbox_height * image_height / image_width
-    else:
-        bbox_height = bbox_width * image_width / image_height
-    xmin_bound = 1 - bbox_width
-    ymin_bound = 1 - bbox_height
-    xmin = np.random.uniform(0, xmin_bound)
-    ymin = np.random.uniform(0, ymin_bound)
-    xmax = xmin + bbox_width
-    ymax = ymin + bbox_height
-    sampled_bbox = [xmin, ymin, xmax, ymax]
-    return sampled_bbox
-
-
-def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
-                         resize_width):
-    num_gt = len(bbox_labels)
-    # np.random.randint range: [low, high)
-    rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
-
-    if num_gt != 0:
-        norm_xmin = bbox_labels[rand_idx][0]
-        norm_ymin = bbox_labels[rand_idx][1]
-        norm_xmax = bbox_labels[rand_idx][2]
-        norm_ymax = bbox_labels[rand_idx][3]
-
-        xmin = norm_xmin * image_width
-        ymin = norm_ymin * image_height
-        wid = image_width * (norm_xmax - norm_xmin)
-        hei = image_height * (norm_ymax - norm_ymin)
-        range_size = 0
-
-        area = wid * hei
-        for scale_ind in range(0, len(scale_array) - 1):
-            if area > scale_array[scale_ind] ** 2 and area < \
-                    scale_array[scale_ind + 1] ** 2:
-                range_size = scale_ind + 1
-                break
-
-        if area > scale_array[len(scale_array) - 2]**2:
-            range_size = len(scale_array) - 2
-
-        scale_choose = 0.0
-        if range_size == 0:
-            rand_idx_size = 0
-        else:
-            # np.random.randint range: [low, high)
-            rng_rand_size = np.random.randint(0, range_size + 1)
-            rand_idx_size = rng_rand_size % (range_size + 1)
-
-        if rand_idx_size == range_size:
-            min_resize_val = scale_array[rand_idx_size] / 2.0
-            max_resize_val = min(2.0 * scale_array[rand_idx_size],
-                                 2 * math.sqrt(wid * hei))
-            scale_choose = random.uniform(min_resize_val, max_resize_val)
-        else:
-            min_resize_val = scale_array[rand_idx_size] / 2.0
-            max_resize_val = 2.0 * scale_array[rand_idx_size]
-            scale_choose = random.uniform(min_resize_val, max_resize_val)
-
-        sample_bbox_size = wid * resize_width / scale_choose
-
-        w_off_orig = 0.0
-        h_off_orig = 0.0
-        if sample_bbox_size < max(image_height, image_width):
-            if wid <= sample_bbox_size:
-                w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
-                                               xmin)
-            else:
-                w_off_orig = np.random.uniform(xmin,
-                                               xmin + wid - sample_bbox_size)
-
-            if hei <= sample_bbox_size:
-                h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
-                                               ymin)
-            else:
-                h_off_orig = np.random.uniform(ymin,
-                                               ymin + hei - sample_bbox_size)
-
-        else:
-            w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
-            h_off_orig = np.random.uniform(image_height - sample_bbox_size,
-                                           0.0)
-
-        w_off_orig = math.floor(w_off_orig)
-        h_off_orig = math.floor(h_off_orig)
-
-        # Figure out top left coordinates.
-        w_off = float(w_off_orig / image_width)
-        h_off = float(h_off_orig / image_height)
-
-        sampled_bbox = [
-            w_off, h_off, w_off + float(sample_bbox_size / image_width),
-            h_off + float(sample_bbox_size / image_height)
-        ]
-        return sampled_bbox
-    else:
-        return 0
-
-
 def jaccard_overlap(sample_bbox, object_bbox):
    if sample_bbox[0] >= object_bbox[2] or \
        sample_bbox[2] <= object_bbox[0] or \
@@ -249,143 +47,143 @@ def jaccard_overlap(sample_bbox, object_bbox):
    return overlap


-def intersect_bbox(bbox1, bbox2):
-    if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
-        bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
-        intersection_box = [0.0, 0.0, 0.0, 0.0]
-    else:
-        intersection_box = [
-            max(bbox1[0], bbox2[0]),
-            max(bbox1[1], bbox2[1]),
-            min(bbox1[2], bbox2[2]),
-            min(bbox1[3], bbox2[3])
-        ]
-    return intersection_box
-
-
-def bbox_coverage(bbox1, bbox2):
-    inter_box = intersect_bbox(bbox1, bbox2)
-    intersect_size = bbox_area(inter_box)
-
-    if intersect_size > 0:
-        bbox1_size = bbox_area(bbox1)
-        return intersect_size / bbox1_size
-    else:
-        return 0.
+def iou_matrix(a, b):
+    tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])

+    area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+    area_o = (area_a[:, np.newaxis] + area_b - area_i)
+    return area_i / (area_o + 1e-10)

-def satisfy_sample_constraint(sampler,
-                              sample_bbox,
-                              gt_bboxes,
-                              satisfy_all=False):
-    if sampler[6] == 0 and sampler[7] == 0:
-        return True
-    satisfied = []
-    for i in range(len(gt_bboxes)):
-        object_bbox = [
-            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
-        ]
-        overlap = jaccard_overlap(sample_bbox, object_bbox)
-        if sampler[6] != 0 and \
-                overlap < sampler[6]:
-            satisfied.append(False)
-            continue
-        if sampler[7] != 0 and \
-                overlap > sampler[7]:
-            satisfied.append(False)
-            continue
-        satisfied.append(True)
-        if not satisfy_all:
-            return True
-
-    if satisfy_all:
-        return np.all(satisfied)
-    else:
-        return False

+def crop_box_with_center_constraint(box, crop):
+    cropped_box = box.copy()
+
+    cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
+    cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
+    cropped_box[:, :2] -= crop[:2]
+    cropped_box[:, 2:] -= crop[:2]
+
+    centers = (box[:, :2] + box[:, 2:]) / 2
+    valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
+    valid = np.logical_and(
+        valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
+
+    return cropped_box, np.where(valid)[0]
+
+
+def is_poly(segm):
+    if not isinstance(segm, (list, dict)):
+        raise Exception("Invalid segm type: {}".format(type(segm)))
+    return isinstance(segm, list)

-def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
-    if sampler[6] == 0 and sampler[7] == 0:
-        has_jaccard_overlap = False
-    else:
-        has_jaccard_overlap = True
-    if sampler[8] == 0 and sampler[9] == 0:
-        has_object_coverage = False
-    else:
-        has_object_coverage = True
-
-    if not has_jaccard_overlap and not has_object_coverage:
-        return True
-    found = False
-    for i in range(len(gt_bboxes)):
-        object_bbox = [
-            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
-        ]
-        if has_jaccard_overlap:
-            overlap = jaccard_overlap(sample_bbox, object_bbox)
-            if sampler[6] != 0 and \
-                    overlap < sampler[6]:
-                continue
-            if sampler[7] != 0 and \
-                    overlap > sampler[7]:
-                continue
-            found = True
-        if has_object_coverage:
-            object_coverage = bbox_coverage(object_bbox, sample_bbox)
-            if sampler[8] != 0 and \
-                    object_coverage < sampler[8]:
-                continue
-            if sampler[9] != 0 and \
-                    object_coverage > sampler[9]:
-                continue
-            found = True
-        if found:
-            return True
-    return found
-
-
-def crop_image_sampling(img, sample_bbox, image_width, image_height,
-                        target_size):
-    # no clipping here
-    xmin = int(sample_bbox[0] * image_width)
-    xmax = int(sample_bbox[2] * image_width)
-    ymin = int(sample_bbox[1] * image_height)
-    ymax = int(sample_bbox[3] * image_height)
-
-    w_off = xmin
-    h_off = ymin
-    width = xmax - xmin
-    height = ymax - ymin
-    cross_xmin = max(0.0, float(w_off))
-    cross_ymin = max(0.0, float(h_off))
-    cross_xmax = min(float(w_off + width - 1.0), float(image_width))
-    cross_ymax = min(float(h_off + height - 1.0), float(image_height))
-    cross_width = cross_xmax - cross_xmin
-    cross_height = cross_ymax - cross_ymin
-
-    roi_xmin = 0 if w_off >= 0 else abs(w_off)
-    roi_ymin = 0 if h_off >= 0 else abs(h_off)
-    roi_width = cross_width
-    roi_height = cross_height
-
-    roi_y1 = int(roi_ymin)
-    roi_y2 = int(roi_ymin + roi_height)
-    roi_x1 = int(roi_xmin)
-    roi_x2 = int(roi_xmin + roi_width)
-
-    cross_y1 = int(cross_ymin)
-    cross_y2 = int(cross_ymin + cross_height)
-    cross_x1 = int(cross_xmin)
-    cross_x2 = int(cross_xmin + cross_width)
-
-    sample_img = np.zeros((height, width, 3))
-    sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
-        img[cross_y1: cross_y2, cross_x1: cross_x2]
-
-    sample_img = cv2.resize(
-        sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
-
-    return sample_img
+
+def crop_image(img, crop):
+    x1, y1, x2, y2 = crop
+    return img[y1:y2, x1:x2, :]
+
+
+def crop_segms(segms, valid_ids, crop, height, width):
+    def _crop_poly(segm, crop):
+        xmin, ymin, xmax, ymax = crop
+        crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
+        crop_p = np.array(crop_coord).reshape(4, 2)
+        crop_p = Polygon(crop_p)
+
+        crop_segm = list()
+        for poly in segm:
+            poly = np.array(poly).reshape(len(poly) // 2, 2)
+            polygon = Polygon(poly)
+            if not polygon.is_valid:
+                exterior = polygon.exterior
+                multi_lines = exterior.intersection(exterior)
+                polygons = shapely.ops.polygonize(multi_lines)
+                polygon = MultiPolygon(polygons)
+            multi_polygon = list()
+            if isinstance(polygon, MultiPolygon):
+                multi_polygon = copy.deepcopy(polygon)
+            else:
+                multi_polygon.append(copy.deepcopy(polygon))
+            for per_polygon in multi_polygon:
+                inter = per_polygon.intersection(crop_p)
+                if not inter:
+                    continue
+                if isinstance(inter, (MultiPolygon, GeometryCollection)):
+                    for part in inter:
+                        if not isinstance(part, Polygon):
+                            continue
+                        part = np.squeeze(
+                            np.array(part.exterior.coords[:-1]).reshape(1, -1))
+                        part[0::2] -= xmin
+                        part[1::2] -= ymin
+                        crop_segm.append(part.tolist())
+                elif isinstance(inter, Polygon):
+                    crop_poly = np.squeeze(
+                        np.array(inter.exterior.coords[:-1]).reshape(1, -1))
+                    crop_poly[0::2] -= xmin
+                    crop_poly[1::2] -= ymin
+                    crop_segm.append(crop_poly.tolist())
+                else:
+                    continue
+        return crop_segm
+
+    def _crop_rle(rle, crop, height, width):
+        if 'counts' in rle and type(rle['counts']) == list:
+            rle = mask_util.frPyObjects(rle, height, width)
+        mask = mask_util.decode(rle)
+        mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
+        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
+        return rle
+
+    crop_segms = []
+    for id in valid_ids:
+        segm = segms[id]
+        if is_poly(segm):
+            import copy
+            import shapely.ops
+            import logging
+            from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
+            logging.getLogger("shapely").setLevel(logging.WARNING)
+            # Polygon format
+            crop_segms.append(_crop_poly(segm, crop))
+        else:
+            # RLE format
+            import pycocotools.mask as mask_util
+            crop_segms.append(_crop_rle(segm, crop, height, width))
+    return crop_segms
+
+
+def expand_segms(segms, x, y, height, width, ratio):
+    def _expand_poly(poly, x, y):
+        expanded_poly = np.array(poly)
+        expanded_poly[0::2] += x
+        expanded_poly[1::2] += y
+        return expanded_poly.tolist()
+
+    def _expand_rle(rle, x, y, height, width, ratio):
+        if 'counts' in rle and type(rle['counts']) == list:
+            rle = mask_util.frPyObjects(rle, height, width)
+        mask = mask_util.decode(rle)
+        expanded_mask = np.full((int(height * ratio), int(width * ratio)),
+                                0).astype(mask.dtype)
+        expanded_mask[y:y + height, x:x + width] = mask
+        rle = mask_util.encode(
+            np.array(expanded_mask, order='F', dtype=np.uint8))
+        return rle
+
+    expanded_segms = []
+    for segm in segms:
+        if is_poly(segm):
+            # Polygon format
+            expanded_segms.append([_expand_poly(poly, x, y) for poly in segm])
+        else:
+            # RLE format
+            import pycocotools.mask as mask_util
+            expanded_segms.append(
+                _expand_rle(segm, x, y, height, width, ratio))
+    return expanded_segms


 def box_horizontal_flip(bboxes, width):
@@ -409,15 +207,10 @@ def segms_horizontal_flip(segms, height, width):
        if 'counts' in rle and type(rle['counts']) == list:
            rle = mask_util.frPyObjects([rle], height, width)
        mask = mask_util.decode(rle)
-        mask = mask[:, ::-1, :]
+        mask = mask[:, ::-1]
        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
        return rle

-    def is_poly(segm):
-        if not isinstance(segm, (list, dict)):
-            raise Exception("Invalid segm type: {}".format(type(segm)))
-        return isinstance(segm, list)
-
    flipped_segms = []
    for segm in segms:
        if is_poly(segm):

--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
@@ -12,13 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .ops import *
-from .box_utils import *
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+
 import random
 import os.path as osp
 import numpy as np
-from PIL import Image, ImageEnhance
+
 import cv2
+from PIL import Image, ImageEnhance
+
+from .ops import *
+from .box_utils import *


 class Compose:
@@ -81,7 +88,7 @@ class Compose:
                im = cv2.imread(im_file).astype('float32')
            except:
                raise TypeError(
-                   'Can\'t read The image file {}!'.format(im_file))
+                    'Can\'t read The image file {}!'.format(im_file))
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # make default im_info with [h, w, 1]
            im_info['im_resize_info'] = np.array(
@@ -656,9 +663,17 @@ class MixupImage:
        gt_score2 = im_info['mixup'][2]['gt_score']
        gt_score = np.concatenate(
            (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
+        if 'gt_poly' in label_info:
+            gt_poly1 = label_info['gt_poly']
+            gt_poly2 = im_info['mixup'][2]['gt_poly']
+            label_info['gt_poly'] = gt_poly1 + gt_poly2
+        is_crowd1 = label_info['is_crowd']
+        is_crowd2 = im_info['mixup'][2]['is_crowd']
+        is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
        label_info['gt_bbox'] = gt_bbox
        label_info['gt_score'] = gt_score
        label_info['gt_class'] = gt_class
+        label_info['is_crowd'] = is_crowd
        im_info['augment_shape'] = np.array([im.shape[0],
                                             im.shape[1]]).astype('int32')
        im_info.pop('mixup')
@@ -670,23 +685,30 @@ class MixupImage:

 class RandomExpand:
    """随机扩张图像，模型训练时的数据增强操作。
-
    1. 随机选取扩张比例（扩张比例大于1时才进行扩张）。
    2. 计算扩张后图像大小。
-    3. 初始化像素值为数据集均值的图像，并将原图像随机粘贴于该图像上。
+    3. 初始化像素值为输入填充值的图像，并将原图像随机粘贴于该图像上。
    4. 根据原图像粘贴位置换算出扩张后真实标注框的位置坐标。
-
+    5. 根据原图像粘贴位置换算出扩张后真实分割区域的位置坐标。
    Args:
-        max_ratio (float): 图像扩张的最大比例。默认为4.0。
+        ratio (float): 图像扩张的最大比例。默认为4.0。
        prob (float): 随机扩张的概率。默认为0.5。
-        mean (list): 图像数据集的均值（0-255）。默认为[127.5, 127.5, 127.5]。
-
+        fill_value (list): 扩张图像的初始填充值（0-255）。默认为[123.675, 116.28, 103.53]。
    """

-    def __init__(self, max_ratio=4., prob=0.5, mean=[127.5, 127.5, 127.5]):
-        self.max_ratio = max_ratio
-        self.mean = mean
+    def __init__(self,
+                 ratio=4.,
+                 prob=0.5,
+                 fill_value=[123.675, 116.28, 103.53]):
+        super(RandomExpand, self).__init__()
+        assert ratio > 1.01, "expand ratio must be larger than 1.01"
+        self.ratio = ratio
        self.prob = prob
+        assert isinstance(fill_value, Sequence), \
+            "fill value must be sequence"
+        if not isinstance(fill_value, tuple):
+            fill_value = tuple(fill_value)
+        self.fill_value = fill_value

    def __call__(self, im, im_info=None, label_info=None):
        """
@@ -694,7 +716,6 @@ class RandomExpand:
            im (np.ndarray): 图像np.ndarray数据。
            im_info (dict, 可选): 存储与图像相关的信息。
            label_info (dict, 可选): 存储与标注框相关的信息。
-
        Returns:
            tuple: 当label_info为空时，返回的tuple为(im, im_info)，分别对应图像np.ndarray数据、存储与图像相关信息的字典；
                   当label_info不为空时，返回的tuple为(im, im_info, label_info)，分别对应图像np.ndarray数据、
@@ -706,7 +727,6 @@ class RandomExpand:
                                          其中n代表真实标注框的个数。
                       - gt_class (np.ndarray): 随机扩张后每个真实标注框对应的类别序号，形状为(n, 1)，
                                           其中n代表真实标注框的个数。
-
        Raises:
            TypeError: 形参数据类型不满足需求。
        """
@@ -721,108 +741,68 @@ class RandomExpand:
                'gt_class' not in label_info:
            raise TypeError('Cannot do RandomExpand! ' + \
                            'Becasuse gt_bbox/gt_class is not in label_info!')
-        prob = np.random.uniform(0, 1)
+        if np.random.uniform(0., 1.) < self.prob:
+            return (im, im_info, label_info)
+
        augment_shape = im_info['augment_shape']
-        im_width = augment_shape[1]
-        im_height = augment_shape[0]
-        gt_bbox = label_info['gt_bbox']
-        gt_class = label_info['gt_class']
-
-        if prob < self.prob:
-            if self.max_ratio - 1 >= 0.01:
-                expand_ratio = np.random.uniform(1, self.max_ratio)
-                height = int(im_height * expand_ratio)
-                width = int(im_width * expand_ratio)
-                h_off = math.floor(np.random.uniform(0, height - im_height))
-                w_off = math.floor(np.random.uniform(0, width - im_width))
-                expand_bbox = [
-                    -w_off / im_width, -h_off / im_height,
-                    (width - w_off) / im_width, (height - h_off) / im_height
-                ]
-                expand_im = np.ones((height, width, 3))
-                expand_im = np.uint8(expand_im * np.squeeze(self.mean))
-                expand_im = Image.fromarray(expand_im)
-                im = im.astype('uint8')
-                im = Image.fromarray(im)
-                expand_im.paste(im, (int(w_off), int(h_off)))
-                expand_im = np.asarray(expand_im)
-                for i in range(gt_bbox.shape[0]):
-                    gt_bbox[i][0] = gt_bbox[i][0] / im_width
-                    gt_bbox[i][1] = gt_bbox[i][1] / im_height
-                    gt_bbox[i][2] = gt_bbox[i][2] / im_width
-                    gt_bbox[i][3] = gt_bbox[i][3] / im_height
-                gt_bbox, gt_class, _ = filter_and_process(
-                    expand_bbox, gt_bbox, gt_class)
-                for i in range(gt_bbox.shape[0]):
-                    gt_bbox[i][0] = gt_bbox[i][0] * width
-                    gt_bbox[i][1] = gt_bbox[i][1] * height
-                    gt_bbox[i][2] = gt_bbox[i][2] * width
-                    gt_bbox[i][3] = gt_bbox[i][3] * height
-                im = expand_im.astype('float32')
-                label_info['gt_bbox'] = gt_bbox
-                label_info['gt_class'] = gt_class
-                im_info['augment_shape'] = np.array([height,
-                                                     width]).astype('int32')
-        if label_info is None:
-            return (im, im_info)
-        else:
+        height = int(augment_shape[0])
+        width = int(augment_shape[1])
+
+        expand_ratio = np.random.uniform(1., self.ratio)
+        h = int(height * expand_ratio)
+        w = int(width * expand_ratio)
+        if not h > height or not w > width:
            return (im, im_info, label_info)
+        y = np.random.randint(0, h - height)
+        x = np.random.randint(0, w - width)
+        canvas = np.ones((h, w, 3), dtype=np.float32)
+        canvas *= np.array(self.fill_value, dtype=np.float32)
+        canvas[y:y + height, x:x + width, :] = im
+
+        im_info['augment_shape'] = np.array([h, w]).astype('int32')
+        if 'gt_bbox' in label_info and len(label_info['gt_bbox']) > 0:
+            label_info['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32)
+        if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
+            label_info['gt_poly'] = expand_segms(label_info['gt_poly'], x, y,
+                                                 height, width, expand_ratio)
+        return (canvas, im_info, label_info)


 class RandomCrop:
    """随机裁剪图像。
-
-    1. 根据batch_sampler计算获取裁剪候选区域的位置。
-        (1) 根据min scale、max scale、min aspect ratio、max aspect ratio计算随机剪裁的高、宽。
-        (2) 根据随机剪裁的高、宽随机选取剪裁的起始点。
-        (3) 筛选出裁剪候选区域：
-            - 当satisfy_all为True时，需所有真实标注框与裁剪候选区域的重叠度满足需求时，该裁剪候选区域才可保留。
-            - 当satisfy_all为False时，当有一个真实标注框与裁剪候选区域的重叠度满足需求时，该裁剪候选区域就可保留。
-    2. 遍历所有裁剪候选区域：
-        (1) 若真实标注框与候选裁剪区域不重叠，或其中心点不在候选裁剪区域，
-            则将该真实标注框去除。
-        (2) 计算相对于该候选裁剪区域，真实标注框的位置，并筛选出对应的类别、混合得分。
-        (3) 若avoid_no_bbox为False，返回当前裁剪后的信息即可；
-            反之，要找到一个裁剪区域中真实标注框个数不为0的区域，才返回裁剪后的信息。
+    1. 若allow_no_crop为True，则在thresholds加入’no_crop’。
+    2. 随机打乱thresholds。
+    3. 遍历thresholds中各元素：
+        (1) 如果当前thresh为’no_crop’，则返回原始图像和标注信息。
+        (2) 随机取出aspect_ratio和scaling中的值并由此计算出候选裁剪区域的高、宽、起始点。
+        (3) 计算真实标注框与候选裁剪区域IoU，若全部真实标注框的IoU都小于thresh，则继续第3步。
+        (4) 如果cover_all_box为True且存在真实标注框的IoU小于thresh，则继续第3步。
+        (5) 筛选出位于候选裁剪区域内的真实标注框，若有效框的个数为0，则继续第3步，否则进行第4步。
+    4. 换算有效真值标注框相对候选裁剪区域的位置坐标。
+    5. 换算有效分割区域相对候选裁剪区域的位置坐标。

    Args:
-        batch_sampler (list): 随机裁剪参数的多种组合，每种组合包含8个值，如下：
-            - max sample (int)：满足当前组合的裁剪区域的个数上限。
-            - max trial (int): 查找满足当前组合的次数。
-            - min scale (float): 裁剪面积相对原面积，每条边缩短比例的最小限制。
-            - max scale (float): 裁剪面积相对原面积，每条边缩短比例的最大限制。
-            - min aspect ratio (float): 裁剪后短边缩放比例的最小限制。
-            - max aspect ratio (float): 裁剪后短边缩放比例的最大限制。
-            - min overlap (float): 真实标注框与裁剪图像重叠面积的最小限制。
-            - max overlap (float): 真实标注框与裁剪图像重叠面积的最大限制。
-            默认值为None，当为None时采用如下设置：
-                [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
-                 [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-        satisfy_all (bool): 是否需要所有标注框满足条件，裁剪候选区域才保留。默认为False。
-        avoid_no_bbox (bool)： 是否对裁剪图像不存在标注框的图像进行保留。默认为True。
-
+        aspect_ratio (list): 裁剪后短边缩放比例的取值范围，以[min, max]形式表示。默认值为[.5, 2.]。
+        thresholds (list): 判断裁剪候选区域是否有效所需的IoU阈值取值列表。默认值为[.0, .1, .3, .5, .7, .9]。
+        scaling (list): 裁剪面积相对原面积的取值范围，以[min, max]形式表示。默认值为[.3, 1.]。
+        num_attempts (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
+        allow_no_crop (bool): 是否允许未进行裁剪。默认值为True。
+        cover_all_box (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
    """

    def __init__(self,
-                 batch_sampler=None,
-                 satisfy_all=False,
-                 avoid_no_bbox=True):
-        if batch_sampler is None:
-            batch_sampler = [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
-                             [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
-        self.batch_sampler = batch_sampler
-        self.satisfy_all = satisfy_all
-        self.avoid_no_bbox = avoid_no_bbox
+                 aspect_ratio=[.5, 2.],
+                 thresholds=[.0, .1, .3, .5, .7, .9],
+                 scaling=[.3, 1.],
+                 num_attempts=50,
+                 allow_no_crop=True,
+                 cover_all_box=False):
+        self.aspect_ratio = aspect_ratio
+        self.thresholds = thresholds
+        self.scaling = scaling
+        self.num_attempts = num_attempts
+        self.allow_no_crop = allow_no_crop
+        self.cover_all_box = cover_all_box

    def __call__(self, im, im_info=None, label_info=None):
        """
@@ -857,66 +837,84 @@ class RandomCrop:
                'gt_class' not in label_info:
            raise TypeError('Cannot do RandomCrop! ' + \
                            'Becasuse gt_bbox/gt_class is not in label_info!')
+
+        if len(label_info['gt_bbox']) == 0:
+            return (im, im_info, label_info)
+
        augment_shape = im_info['augment_shape']
-        im_width = augment_shape[1]
-        im_height = augment_shape[0]
+        w = augment_shape[1]
+        h = augment_shape[0]
        gt_bbox = label_info['gt_bbox']
-        gt_bbox_tmp = gt_bbox.copy()
-        for i in range(gt_bbox_tmp.shape[0]):
-            gt_bbox_tmp[i][0] = gt_bbox[i][0] / im_width
-            gt_bbox_tmp[i][1] = gt_bbox[i][1] / im_height
-            gt_bbox_tmp[i][2] = gt_bbox[i][2] / im_width
-            gt_bbox_tmp[i][3] = gt_bbox[i][3] / im_height
-        gt_class = label_info['gt_class']
-
-        gt_score = None
-        if 'gt_score' in label_info:
-            gt_score = label_info['gt_score']
-        sampled_bbox = []
-        gt_bbox_tmp = gt_bbox_tmp.tolist()
-        for sampler in self.batch_sampler:
-            found = 0
-            for i in range(sampler[1]):
-                if found >= sampler[0]:
-                    break
-                sample_bbox = generate_sample_bbox(sampler)
-                if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox_tmp,
-                                             self.satisfy_all):
-                    sampled_bbox.append(sample_bbox)
-                    found = found + 1
-        im = np.array(im)
-        while sampled_bbox:
-            idx = int(np.random.uniform(0, len(sampled_bbox)))
-            sample_bbox = sampled_bbox.pop(idx)
-            sample_bbox = clip_bbox(sample_bbox)
-            crop_bbox, crop_class, crop_score = \
-                filter_and_process(sample_bbox, gt_bbox_tmp, gt_class, gt_score)
-            if self.avoid_no_bbox:
-                if len(crop_bbox) < 1:
+        thresholds = list(self.thresholds)
+        if self.allow_no_crop:
+            thresholds.append('no_crop')
+        np.random.shuffle(thresholds)
+
+        for thresh in thresholds:
+            if thresh == 'no_crop':
+                return (im, im_info, label_info)
+
+            found = False
+            for i in range(self.num_attempts):
+                scale = np.random.uniform(*self.scaling)
+                min_ar, max_ar = self.aspect_ratio
+                aspect_ratio = np.random.uniform(
+                    max(min_ar, scale**2), min(max_ar, scale**-2))
+                crop_h = int(h * scale / np.sqrt(aspect_ratio))
+                crop_w = int(w * scale * np.sqrt(aspect_ratio))
+                crop_y = np.random.randint(0, h - crop_h)
+                crop_x = np.random.randint(0, w - crop_w)
+                crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
+                iou = iou_matrix(gt_bbox, np.array([crop_box],
+                                                   dtype=np.float32))
+                if iou.max() < thresh:
                    continue
-            xmin = int(sample_bbox[0] * im_width)
-            xmax = int(sample_bbox[2] * im_width)
-            ymin = int(sample_bbox[1] * im_height)
-            ymax = int(sample_bbox[3] * im_height)
-            im = im[ymin:ymax, xmin:xmax]
-            for i in range(crop_bbox.shape[0]):
-                crop_bbox[i][0] = crop_bbox[i][0] * (xmax - xmin)
-                crop_bbox[i][1] = crop_bbox[i][1] * (ymax - ymin)
-                crop_bbox[i][2] = crop_bbox[i][2] * (xmax - xmin)
-                crop_bbox[i][3] = crop_bbox[i][3] * (ymax - ymin)
-            label_info['gt_bbox'] = crop_bbox
-            label_info['gt_class'] = crop_class
-            label_info['gt_score'] = crop_score
-            im_info['augment_shape'] = np.array([ymax - ymin,
-                                                 xmax - xmin]).astype('int32')
-            if label_info is None:
-                return (im, im_info)
-            else:
+
+                if self.cover_all_box and iou.min() < thresh:
+                    continue
+
+                cropped_box, valid_ids = crop_box_with_center_constraint(
+                    gt_bbox, np.array(crop_box, dtype=np.float32))
+                if valid_ids.size > 0:
+                    found = True
+                    break
+
+            if found:
+                if 'gt_poly' in label_info and len(label_info['gt_poly']) > 0:
+                    crop_polys = crop_segms(label_info['gt_poly'], valid_ids,
+                                            np.array(crop_box, dtype=np.int64),
+                                            h, w)
+                    if [] in crop_polys:
+                        delete_id = list()
+                        valid_polys = list()
+                        for id, crop_poly in enumerate(crop_polys):
+                            if crop_poly == []:
+                                delete_id.append(id)
+                            else:
+                                valid_polys.append(crop_poly)
+                        valid_ids = np.delete(valid_ids, delete_id)
+                        if len(valid_polys) == 0:
+                            return (im, im_info, label_info)
+                        label_info['gt_poly'] = valid_polys
+                    else:
+                        label_info['gt_poly'] = crop_polys
+                im = crop_image(im, crop_box)
+                label_info['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
+                label_info['gt_class'] = np.take(
+                    label_info['gt_class'], valid_ids, axis=0)
+                im_info['augment_shape'] = np.array(
+                    [crop_box[3] - crop_box[1],
+                     crop_box[2] - crop_box[0]]).astype('int32')
+                if 'gt_score' in label_info:
+                    label_info['gt_score'] = np.take(
+                        label_info['gt_score'], valid_ids, axis=0)
+
+                if 'is_crowd' in label_info:
+                    label_info['is_crowd'] = np.take(
+                        label_info['is_crowd'], valid_ids, axis=0)
                return (im, im_info, label_info)
-        if label_info is None:
-            return (im, im_info)
-        else:
-            return (im, im_info, label_info)
+
+        return (im, im_info, label_info)


 class ArrangeFasterRCNN:

--- a/paddlex/det.py
+++ b/paddlex/det.py
@@ -20,3 +20,4 @@ YOLOv3 = cv.models.YOLOv3
 MaskRCNN = cv.models.MaskRCNN
 transforms = cv.transforms.det_transforms
 visualize = cv.models.utils.visualize.visualize_detection
+draw_pr_curve = cv.models.utils.visualize.draw_pr_curve
--- a/paddlex/utils/utils.py
+++ b/paddlex/utils/utils.py
@@ -220,3 +220,39 @@ def load_pretrain_weights(exe, main_prog, weights_dir, fuse_bn=False):
            len(vars_to_load), weights_dir))
    if fuse_bn:
        fuse_bn_weights(exe, main_prog, weights_dir)
+
+
+class EarlyStop:
+    def __init__(self, patience, thresh):
+        self.patience = patience
+        self.counter = 0
+        self.score = None
+        self.max = 0
+        self.thresh = thresh
+        if patience < 1:
+            raise Exception("Argument patience should be a positive integer.")
+
+    def __call__(self, current_score):
+        if self.score is None:
+            self.score = current_score
+            return False
+        elif current_score > self.max:
+            self.counter = 0
+            self.score = current_score
+            self.max = current_score
+            return False
+        else:
+            if (abs(self.score - current_score) < self.thresh
+                    or current_score < self.score):
+                self.counter += 1
+                self.score = current_score
+                logging.debug(
+                    "EarlyStopping: %i / %i" % (self.counter, self.patience))
+                if self.counter >= self.patience:
+                    logging.info("EarlyStopping: Stop training")
+                    return True
+                return False
+            else:
+                self.counter = 0
+                self.score = current_score
+                return False
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ cython
 pycocotools
 visualdl=1.3.0
 paddleslim=1.0.1
+shapely
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,8 @@ setuptools.setup(
    packages=setuptools.find_packages(),
    setup_requires=['cython', 'numpy', 'sklearn'],
    install_requires=[
-        'pycocotools', 'pyyaml', 'colorama', 'tqdm', 'visualdl==1.3.0',
+        "pycocotools;platform_system!='Windows'", 
+        'pyyaml', 'colorama', 'tqdm', 'visualdl==1.3.0',
        'paddleslim==1.0.1', 'paddlehub>=1.6.2'
    ],
    classifiers=[