Merge branch 'develop' of https://github.com/paddlepaddle/serving into develop

ed938a1a · wangjiawei04 · 9ba7b510 · 62a23aec · ed938a1a · ed938a1a
102 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,16 +18,13 @@ set(PADDLE_SERVING_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(PADDLE_SERVING_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 SET(PADDLE_SERVING_INSTALL_DIR ${CMAKE_BINARY_DIR}/output)
 SET(CMAKE_INSTALL_RPATH "\$ORIGIN" "${CMAKE_INSTALL_RPATH}")
 include(system)
 project(paddle-serving CXX C)
 message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
        "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
 message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
        "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 find_package(Git REQUIRED)
 find_package(Threads REQUIRED)
 find_package(CUDA QUIET)
@@ -40,25 +37,41 @@ if(NOT CMAKE_BUILD_TYPE)
      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
      FORCE)
 endif()
+SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb")
+SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
  "A path setting third party libraries download & build directories.")
 set(THIRD_PARTY_BUILD_TYPE Release)
-option(WITH_AVX	    "Compile Paddle Serving with AVX intrinsics"    OFF)
+option(WITH_AVX	            "Compile Paddle Serving with AVX intrinsics"        OFF)
-option(WITH_MKL	    "Compile Paddle Serving with MKL support."      OFF)
+option(WITH_MKL	            "Compile Paddle Serving with MKL support."          OFF)
-option(WITH_GPU	    "Compile Paddle Serving with NVIDIA GPU"        OFF)
+option(WITH_GPU	            "Compile Paddle Serving with NVIDIA GPU"            OFF)
-option(WITH_LITE    "Compile Paddle Serving with Paddle Lite Engine"    OFF)
+option(WITH_LITE            "Compile Paddle Serving with Paddle Lite Engine"    OFF)
-option(WITH_XPU	    "Compile Paddle Serving with Baidu Kunlun"        OFF)
+option(WITH_XPU	            "Compile Paddle Serving with Baidu Kunlun"          OFF)
-option(WITH_PYTHON  "Compile Paddle Serving with Python"		    ON)
+option(WITH_PYTHON          "Compile Paddle Serving with Python"                 ON)
-option(CLIENT  	    "Compile Paddle Serving Client"		    OFF)
+option(CLIENT  	            "Compile Paddle Serving Client"		                OFF)
-option(SERVER	    "Compile Paddle Serving Server"		    OFF)
+option(SERVER	            "Compile Paddle Serving Server"		                OFF)
-option(APP          "Compile Paddle Serving App package"	    OFF)
+option(APP                  "Compile Paddle Serving App package"	            OFF)
-option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution"              OFF)
+option(WITH_ELASTIC_CTR     "Compile ELASITC-CTR solution"                      OFF)
-option(PACK         "Compile for whl"                               OFF)
+option(PACK                 "Compile for whl"                                   OFF)
-option(WITH_TRT     "Compile Paddle Serving with TRT"       OFF)
+option(WITH_TRT             "Compile Paddle Serving with TRT"                   OFF)
-option(PADDLE_ON_INFERENCE "Compile for encryption" ON)
+option(PADDLE_ON_INFERENCE  "Compile for encryption"                             ON)
+option(WITH_OPENCV	    "Compile Paddle Serving with OPENCV"                    OFF)
+if (WITH_OPENCV)
+    SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+    if(NOT DEFINED OPENCV_DIR)
+        message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+    endif()
+    if (WIN32)
+    find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+    else ()
+    find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
+    endif ()
+    include_directories(${OpenCV_INCLUDE_DIRS})
+endif()
 if (PADDLE_ON_INFERENCE)
    add_definitions(-DPADDLE_ON_INFERENCE)

--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ We consider deploying deep learning inference service online to be a user-facing
 - Any model trained by [PaddlePaddle](https://github.com/paddlepaddle/paddle) can be directly used or [Model Conversion Interface](./doc/SAVE.md) for online deployment of Paddle Serving.
 - Support [Multi-model Pipeline Deployment](./doc/PIPELINE_SERVING.md), and provide the requirements of the REST interface and RPC interface itself, [Pipeline example](./python/examples/pipeline).
- Support the model zoos from the Paddle ecosystem, such as [PaddleDetection](./python/examples/detection), [PaddleOCR](./python/examples/ocr), [PaddleRec](https://github.com/PaddlePaddle/PaddleRec/tree/master/tools/recserving/movie_recommender).
+- Support the model zoos from the Paddle ecosystem, such as [PaddleDetection](./python/examples/detection), [PaddleOCR](./python/examples/ocr), [PaddleRec](https://github.com/PaddlePaddle/PaddleRec/tree/master/recserving/movie_recommender).
 - Provide a variety of pre-processing and post-processing to facilitate users in training, deployment and other stages of related code, bridging the gap between AI developers and application developers, please refer to
 [Serving Examples](./python/examples/).

--- a/README_CN.md
+++ b/README_CN.md
@@ -44,7 +44,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
 - 任何经过[PaddlePaddle](https://github.com/paddlepaddle/paddle)训练的模型，都可以经过直接保存或是[模型转换接口](./doc/SAVE_CN.md)，用于Paddle Serving在线部署。
 - 支持[多模型串联服务部署](./doc/PIPELINE_SERVING_CN.md), 同时提供Rest接口和RPC接口以满足您的需求，[Pipeline示例](./python/examples/pipeline)。
- 支持Paddle生态的各大模型库, 例如[PaddleDetection](./python/examples/detection)，[PaddleOCR](./python/examples/ocr)，[PaddleRec](https://github.com/PaddlePaddle/PaddleRec/tree/master/tools/recserving/movie_recommender)。
+- 支持Paddle生态的各大模型库, 例如[PaddleDetection](./python/examples/detection)，[PaddleOCR](./python/examples/ocr)，[PaddleRec](https://github.com/PaddlePaddle/PaddleRec/tree/master/recserving/movie_recommender)。
 - 提供丰富多彩的前后处理，方便用户在训练、部署等各阶段复用相关代码，弥合AI开发者和应用开发者之间的鸿沟，详情参考[模型示例](./python/examples/)。
 <p align="center">

--- a/cmake/external/zlib.cmake
+++ b/cmake/external/zlib.cmake
@@ -26,7 +26,7 @@ ExternalProject_Add(
    extern_zlib
    ${EXTERNAL_PROJECT_LOG_ARGS}
    GIT_REPOSITORY  "https://github.com/madler/zlib.git"
-    GIT_TAG         "v1.2.8"
+    GIT_TAG         "v1.2.9"
    PREFIX          ${ZLIB_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CMAKE_ARGS      -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
@@ -54,7 +54,10 @@ ELSE(WIN32)
  SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
 ENDIF(WIN32)
-ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
+IF(NOT WITH_OPENCV)
+  ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
+ENDIF()
 SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
 ADD_DEPENDENCIES(zlib extern_zlib)

--- a/core/configure/proto/multi_lang_general_model_service.proto
+++ b/core/configure/proto/multi_lang_general_model_service.proto
@@ -59,7 +59,7 @@ message SimpleResponse { required int32 err_code = 1; }
 message GetClientConfigRequest {}
-message GetClientConfigResponse { required string client_config_str = 1; }
+message GetClientConfigResponse { repeated string client_config_str_list = 1; }
 service MultiLangGeneralModelService {
  rpc Inference(InferenceRequest) returns (InferenceResponse) {}

--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -55,10 +55,10 @@ message ModelToolkitConf { repeated EngineDesc engines = 1; };
 // reource conf
 message ResourceConf {
-  required string model_toolkit_path = 1;
+  repeated string model_toolkit_path = 1;
-  required string model_toolkit_file = 2;
+  repeated string model_toolkit_file = 2;
-  optional string general_model_path = 3;
+  repeated string general_model_path = 3;
-  optional string general_model_file = 4;
+  repeated string general_model_file = 4;
  optional string cube_config_path = 5;
  optional string cube_config_file = 6;
  optional int32 cube_quant_bits = 7; // set 0 if no quant.

--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -207,7 +207,7 @@ class PredictorClient {
  void init_gflags(std::vector<std::string> argv);
-  int init(const std::string& client_conf);
+  int init(const std::vector<std::string> &client_conf);
  void set_predictor_conf(const std::string& conf_path,
                          const std::string& conf_file);
@@ -227,6 +227,10 @@ class PredictorClient {
      const std::vector<std::string>& int_feed_name,
      const std::vector<std::vector<int>>& int_shape,
      const std::vector<std::vector<int>>& int_lod_slot_batch,
+      const std::vector<std::vector<std::string>>& string_feed_batch,
+      const std::vector<std::string>& string_feed_name,
+      const std::vector<std::vector<int>>& string_shape,
+      const std::vector<std::vector<int>>& string_lod_slot_batch,
      const std::vector<std::string>& fetch_name,
      PredictorRes& predict_res_batch,  // NOLINT
      const int& pid,

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -28,7 +28,7 @@ using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::general_model::FetchInst;
+enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
 std::once_flag gflags_init_flag;
 namespace py = pybind11;
@@ -56,12 +56,12 @@ void PredictorClient::init_gflags(std::vector<std::string> argv) {
  });
 }
-int PredictorClient::init(const std::string &conf_file) {
+int PredictorClient::init(const std::vector<std::string> &conf_file) {
  try {
    GeneralModelConfig model_config;
-    if (configure::read_proto_conf(conf_file.c_str(), &model_config) != 0) {
+    if (configure::read_proto_conf(conf_file[0].c_str(), &model_config) != 0) {
      LOG(ERROR) << "Failed to load general model config"
-                 << ", file path: " << conf_file;
+                 << ", file path: " << conf_file[0];
      return -1;
    }
@@ -69,9 +69,7 @@ int PredictorClient::init(const std::string &conf_file) {
    _fetch_name_to_idx.clear();
    _shape.clear();
    int feed_var_num = model_config.feed_var_size();
-    int fetch_var_num = model_config.fetch_var_size();
+    VLOG(2) << "feed var num: " << feed_var_num;
-    VLOG(2) << "feed var num: " << feed_var_num
-            << "fetch_var_num: " << fetch_var_num;
    for (int i = 0; i < feed_var_num; ++i) {
      _feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
      VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
@@ -90,6 +88,16 @@ int PredictorClient::init(const std::string &conf_file) {
      _shape.push_back(tmp_feed_shape);
    }
+    if (conf_file.size() > 1) {
+      model_config.Clear();
+      if (configure::read_proto_conf(conf_file[conf_file.size()-1].c_str(), &model_config) != 0) {
+        LOG(ERROR) << "Failed to load general model config"
+                  << ", file path: " << conf_file[conf_file.size()-1];
+        return -1;
+      }
+    }
+    int fetch_var_num = model_config.fetch_var_size();
+    VLOG(2) << "fetch_var_num: " << fetch_var_num;
    for (int i = 0; i < fetch_var_num; ++i) {
      _fetch_name_to_idx[model_config.fetch_var(i).alias_name()] = i;
      VLOG(2) << "fetch [" << i << "]"
@@ -146,11 +154,16 @@ int PredictorClient::numpy_predict(
    const std::vector<std::string> &int_feed_name,
    const std::vector<std::vector<int>> &int_shape,
    const std::vector<std::vector<int>> &int_lod_slot_batch,
+    const std::vector<std::vector<std::string>>& string_feed_batch,
+    const std::vector<std::string>& string_feed_name,
+    const std::vector<std::vector<int>>& string_shape,
+    const std::vector<std::vector<int>>& string_lod_slot_batch,
    const std::vector<std::string> &fetch_name,
    PredictorRes &predict_res_batch,
    const int &pid,
    const uint64_t log_id) {
  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
+  batch_size = batch_size > string_feed_batch.size() ? batch_size : string_feed_batch.size();
  VLOG(2) << "batch size: " << batch_size;
  predict_res_batch.clear();
  Timer timeline;
@@ -165,6 +178,7 @@ int PredictorClient::numpy_predict(
  VLOG(2) << "fetch general model predictor done.";
  VLOG(2) << "float feed name size: " << float_feed_name.size();
  VLOG(2) << "int feed name size: " << int_feed_name.size();
+  VLOG(2) << "string feed name size: " << string_feed_name.size();
  VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
  Request req;
  req.set_log_id(log_id);
@@ -172,12 +186,15 @@ int PredictorClient::numpy_predict(
    req.add_fetch_var_names(name);
  }
+  int vec_idx = 0;
  for (int bi = 0; bi < batch_size; bi++) {
    VLOG(2) << "prepare batch " << bi;
    std::vector<Tensor *> tensor_vec;
    FeedInst *inst = req.add_insts();
    std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
    std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
+    std::vector<std::string> string_feed = string_feed_batch[bi];
    for (auto &name : float_feed_name) {
      tensor_vec.push_back(inst->add_tensor_array());
    }
@@ -186,14 +203,19 @@ int PredictorClient::numpy_predict(
      tensor_vec.push_back(inst->add_tensor_array());
    }
-    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
+    for (auto &name : string_feed_name) {
-            << "prepared";
+      tensor_vec.push_back(inst->add_tensor_array());
+    }
-    int vec_idx = 0;
+    VLOG(2) << "batch [" << bi << "] " << "prepared";
-    VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
-            << float_shape.size();
+    vec_idx = 0;
    for (auto &name : float_feed_name) {
      int idx = _feed_name_to_idx[name];
+      if (idx >= tensor_vec.size()) {
+        LOG(ERROR) << "idx > tensor_vec.size()";
+        return -1;
+      }
      Tensor *tensor = tensor_vec[idx];
      VLOG(2) << "prepare float feed " << name << " shape size "
              << float_shape[vec_idx].size();
@@ -203,7 +225,7 @@ int PredictorClient::numpy_predict(
      for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
        tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
      }
-      tensor->set_elem_type(1);
+      tensor->set_elem_type(P_FLOAT32);
      const int float_shape_size = float_shape[vec_idx].size();
      switch (float_shape_size) {
        case 4: {
@@ -249,13 +271,17 @@ int PredictorClient::numpy_predict(
      }
      vec_idx++;
    }
    VLOG(2) << "batch [" << bi << "] "
            << "float feed value prepared";
    vec_idx = 0;
    for (auto &name : int_feed_name) {
      int idx = _feed_name_to_idx[name];
+      if (idx >= tensor_vec.size()) {
+        LOG(ERROR) << "idx > tensor_vec.size()";
+        return -1;
+      }
      Tensor *tensor = tensor_vec[idx];
      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
@@ -266,7 +292,7 @@ int PredictorClient::numpy_predict(
      }
      tensor->set_elem_type(_type[idx]);
-      if (_type[idx] == 0) {
+      if (_type[idx] == P_INT64) {
        VLOG(2) << "prepare int feed " << name << " shape size "
                << int_shape[vec_idx].size();
      } else {
@@ -282,7 +308,7 @@ int PredictorClient::numpy_predict(
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
                for (ssize_t l = 0; k < int_array.shape(3); l++) {
-                  if (_type[idx] == 0) {
+                  if (_type[idx] == P_INT64) {
                    tensor->add_int64_data(int_array(i, j, k, l));
                  } else {
                    tensor->add_int_data(int_array(i, j, k, l));
@@ -298,7 +324,7 @@ int PredictorClient::numpy_predict(
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
-                if (_type[idx] == 0) {
+                if (_type[idx] == P_INT64) {
                  tensor->add_int64_data(int_array(i, j, k));
                } else {
                  tensor->add_int_data(int_array(i, j, k));
@@ -312,7 +338,7 @@ int PredictorClient::numpy_predict(
          auto int_array = int_feed[vec_idx].unchecked<2>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
-              if (_type[idx] == 0) {
+              if (_type[idx] == P_INT64) {
                tensor->add_int64_data(int_array(i, j));
              } else {
                tensor->add_int_data(int_array(i, j));
@@ -324,7 +350,7 @@ int PredictorClient::numpy_predict(
        case 1: {
          auto int_array = int_feed[vec_idx].unchecked<1>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
-            if (_type[idx] == 0) {
+            if (_type[idx] == P_INT64) {
              tensor->add_int64_data(int_array(i));
            } else {
              tensor->add_int_data(int_array(i));
@@ -338,6 +364,42 @@ int PredictorClient::numpy_predict(
    VLOG(2) << "batch [" << bi << "] "
            << "int feed value prepared";
+    vec_idx = 0;
+    for (auto &name : string_feed_name) {
+      int idx = _feed_name_to_idx[name];
+      if (idx >= tensor_vec.size()) {
+        LOG(ERROR) << "idx > tensor_vec.size()";
+        return -1;
+      }
+      Tensor *tensor = tensor_vec[idx];
+      for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(string_shape[vec_idx][j]);
+      }
+      for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
+        tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
+      }
+      tensor->set_elem_type(P_STRING);
+      const int string_shape_size = string_shape[vec_idx].size();
+      //string_shape[vec_idx] = [1];cause numpy has no datatype of string.
+      //we pass string via vector<vector<string> >.
+      if (string_shape_size != 1) {
+        LOG(ERROR) << "string_shape_size should be 1-D, but received is : " << string_shape_size;
+        return -1;
+      }
+      switch (string_shape_size) {
+        case 1: {
+          tensor->add_data(string_feed[vec_idx]);
+          break;
+        }
+      }
+      vec_idx++;
+    }
+    VLOG(2) << "batch [" << bi << "] "
+            << "string feed value prepared";
  }
  int64_t preprocess_end = timeline.TimeStampUS();
@@ -397,19 +459,19 @@ int PredictorClient::numpy_predict(
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        if (_fetch_name_to_type[name] == 0) {
+        if (_fetch_name_to_type[name] == P_INT64) {
          VLOG(2) << "ferch var " << name << "type int64";
          int size = output.insts(0).tensor_array(idx).int64_data_size();
          model._int64_value_map[name] = std::vector<int64_t>(
              output.insts(0).tensor_array(idx).int64_data().begin(),
              output.insts(0).tensor_array(idx).int64_data().begin() + size);
-        } else if (_fetch_name_to_type[name] == 1) {
+        } else if (_fetch_name_to_type[name] == P_FLOAT32) {
          VLOG(2) << "fetch var " << name << "type float";
          int size = output.insts(0).tensor_array(idx).float_data_size();
          model._float_value_map[name] = std::vector<float>(
              output.insts(0).tensor_array(idx).float_data().begin(),
              output.insts(0).tensor_array(idx).float_data().begin() + size);
-        } else if (_fetch_name_to_type[name] == 2) {
+        } else if (_fetch_name_to_type[name] == P_INT32) {
          VLOG(2) << "fetch var " << name << "type int32";
          int size = output.insts(0).tensor_array(idx).int_data_size();
          model._int32_value_map[name] = std::vector<int32_t>(

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -78,7 +78,7 @@ PYBIND11_MODULE(serving_client, m) {
             self.init_gflags(argv);
           })
      .def("init",
-           [](PredictorClient &self, const std::string &conf) {
+           [](PredictorClient &self, const std::vector<std::string> &conf) {
             return self.init(conf);
           })
      .def("set_predictor_conf",
@@ -107,6 +107,10 @@ PYBIND11_MODULE(serving_client, m) {
              const std::vector<std::string> &int_feed_name,
              const std::vector<std::vector<int>> &int_shape,
              const std::vector<std::vector<int>> &int_lod_slot_batch,
+              const std::vector<std::vector<std::string>>& string_feed_batch,
+              const std::vector<std::string>& string_feed_name,
+              const std::vector<std::vector<int>>& string_shape,
+              const std::vector<std::vector<int>>& string_lod_slot_batch,
              const std::vector<std::string> &fetch_name,
              PredictorRes &predict_res_batch,
              const int &pid,
@@ -119,6 +123,10 @@ PYBIND11_MODULE(serving_client, m) {
                                       int_feed_name,
                                       int_shape,
                                       int_lod_slot_batch,
+                                       string_feed_batch,
+                                       string_feed_name,
+                                       string_shape,
+                                       string_lod_slot_batch,
                                       fetch_name,
                                       predict_res_batch,
                                       pid,

--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
 include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../../)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
 add_executable(serving ${serving_srcs})
 add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api utils)
@@ -20,6 +21,9 @@ include_directories(${CUDNN_ROOT}/include/)
 target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
        -Wl,--no-whole-archive)
+if(WITH_OPENCV)
+    target_link_libraries(serving ${OpenCV_LIBS})
+endif()
 target_link_libraries(serving paddle_inference ${paddle_depend_libs})
 target_link_libraries(serving brpc)
 target_link_libraries(serving protobuf)
@@ -27,6 +31,7 @@ target_link_libraries(serving pdserving)
 target_link_libraries(serving cube-api)
 target_link_libraries(serving utils)
 if(WITH_GPU)
    target_link_libraries(serving ${CUDA_LIBRARIES})
 endif()

--- a/core/general-server/op/CMakeLists.txt
+++ b/core/general-server/op/CMakeLists.txt
 FILE(GLOB op_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp ${CMAKE_CURRENT_LIST_DIR}/../../predictor/tools/quant.cpp)
+if(WITH_OPENCV)
+    FILE(GLOB ocrtools_srcs ${CMAKE_CURRENT_LIST_DIR}/../../predictor/tools/ocrtools/*.cpp)
+    LIST(APPEND op_srcs ${ocrtools_srcs})
+else()
+    set (EXCLUDE_DIR "general_detection_op.cpp")
+    foreach (TMP_PATH ${op_srcs})
+        string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
+        if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
+            list (REMOVE_ITEM op_srcs ${TMP_PATH})
+            break()
+        endif ()
+    endforeach(TMP_PATH)
+endif()
 LIST(APPEND serving_srcs ${op_srcs})
--- a/core/general-server/op/general_detection_op.cpp
+++ b/core/general-server/op/general_detection_op.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "core/general-server/op/general_detection_op.h"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include "core/predictor/framework/infer.h"
+#include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/resource.h"
+#include "core/util/include/timer.h"
+/*
+#include "opencv2/imgcodecs/legacy/constants_c.h"
+#include "opencv2/imgproc/types_c.h"
+*/
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::FetchInst;
+using baidu::paddle_serving::predictor::InferManager;
+using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
+int GeneralDetectionOp::inference() {
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+      return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+      return -1;
+  }
+  output_blob->SetLogId(log_id);
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector* out = &output_blob->tensor_vector;
+  int batch_size = input_blob->_batch_size;
+  VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+  std::vector<int> input_shape;
+  int in_num =0;
+  void* databuf_data = NULL;
+  char* databuf_char = NULL;
+  size_t databuf_size = 0;
+  std::string* input_ptr = static_cast<std::string*>(in->at(0).data.data());
+  std::string base64str = input_ptr[0];
+  float ratio_h{};
+  float ratio_w{};
+  cv::Mat img = Base2Mat(base64str);
+  cv::Mat srcimg;
+  cv::Mat resize_img;
+  cv::Mat resize_img_rec;
+  cv::Mat crop_img;
+  img.copyTo(srcimg);
+  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
+                       this->use_tensorrt_);
+  this->normalize_op_.Run(&resize_img, this->mean_det, this->scale_det,
+                          this->is_scale_);
+  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+  this->permute_op_.Run(&resize_img, input.data());
+  TensorVector* real_in = new TensorVector();
+  if (!real_in) {
+    LOG(ERROR) << "real_in is nullptr,error";
+    return -1;
+  }
+  for (int i = 0; i < in->size(); ++i) {
+    input_shape = {1, 3, resize_img.rows, resize_img.cols};
+    in_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
+    databuf_size = in_num*sizeof(float);
+    databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+    if (!databuf_data) {
+        LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+        return -1;
+    }
+    memcpy(databuf_data,input.data(),databuf_size);
+    databuf_char = reinterpret_cast<char*>(databuf_data);
+    paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
+    paddle::PaddleTensor tensor_in;
+    tensor_in.name = in->at(i).name;
+    tensor_in.dtype = paddle::PaddleDType::FLOAT32;
+    tensor_in.shape = {1, 3, resize_img.rows, resize_img.cols};
+    tensor_in.lod = in->at(i).lod;
+    tensor_in.data = paddleBuf;
+    real_in->push_back(tensor_in);
+  }
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+  if (InferManager::instance().infer(
+          engine_name().c_str(), real_in, out, batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
+    return -1;
+  }
+  std::vector<int> output_shape;
+  int out_num =0;
+  void* databuf_data_out = NULL;
+  char* databuf_char_out = NULL;
+  size_t databuf_size_out = 0;
+  //this is special add for PaddleOCR postprecess
+  int infer_outnum =  out->size();
+  for (int k = 0;k <infer_outnum; ++k) {
+    int n2 = out->at(k).shape[2];
+    int n3 = out->at(k).shape[3];
+    int n = n2 * n3;
+    float* out_data = static_cast<float*>(out->at(k).data.data());
+    std::vector<float> pred(n, 0.0);
+    std::vector<unsigned char> cbuf(n, ' ');
+    for (int i = 0; i < n; i++) {
+      pred[i] = float(out_data[i]);
+      cbuf[i] = (unsigned char)((out_data[i]) * 255);
+    }
+    cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf.data());
+    cv::Mat pred_map(n2, n3, CV_32F, (float *)pred.data());
+    const double threshold = this->det_db_thresh_ * 255;
+    const double maxvalue = 255;
+    cv::Mat bit_map;
+    cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
+    cv::Mat dilation_map;
+    cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
+    cv::dilate(bit_map, dilation_map, dila_ele);
+    boxes = post_processor_.BoxesFromBitmap(pred_map, dilation_map,
+                                            this->det_db_box_thresh_,
+                                            this->det_db_unclip_ratio_);
+    boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
+    for (int i = boxes.size() - 1; i >= 0; i--) {
+      crop_img = GetRotateCropImage(img, boxes[i]);
+      float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
+      this->resize_op_rec.Run(crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_);
+      this->normalize_op_.Run(&resize_img_rec, this->mean_rec, this->scale_rec,
+                              this->is_scale_);
+      std::vector<float> output_rec(1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f);
+      this->permute_op_.Run(&resize_img_rec, output_rec.data());
+      // Inference.
+      output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
+      out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
+      databuf_size_out = out_num*sizeof(float);
+      databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
+      if (!databuf_data_out) {
+          LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
+          return -1;
+      }
+      memcpy(databuf_data_out,output_rec.data(),databuf_size_out);
+      databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
+      paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
+      paddle::PaddleTensor tensor_out;
+      tensor_out.name = "image";
+      tensor_out.dtype = paddle::PaddleDType::FLOAT32;
+      tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
+      tensor_out.data = paddleBuf;
+      out->push_back(tensor_out);
+    }
+  }
+  out->erase(out->begin(),out->begin()+infer_outnum);
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0;
+}
+cv::Mat GeneralDetectionOp::Base2Mat(std::string &base64_data)
+{
+	cv::Mat img;
+	std::string s_mat;
+	s_mat = base64Decode(base64_data.data(), base64_data.size());
+	std::vector<char> base64_img(s_mat.begin(), s_mat.end());
+	img = cv::imdecode(base64_img, cv::IMREAD_COLOR);//CV_LOAD_IMAGE_COLOR
+	return img;
+}
+std::string GeneralDetectionOp::base64Decode(const char* Data, int DataByte)
+{
+	const char DecodeTable[] =
+	{
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		62, // '+'
+		0, 0, 0,
+		63, // '/'
+		52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
+		0, 0, 0, 0, 0, 0, 0,
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+		13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
+		0, 0, 0, 0, 0, 0,
+		26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+		39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
+	};
+	std::string strDecode;
+	int nValue;
+	int i = 0;
+	while (i < DataByte)
+	{
+		if (*Data != '\r' && *Data != '\n')
+		{
+			nValue = DecodeTable[*Data++] << 18;
+			nValue += DecodeTable[*Data++] << 12;
+			strDecode += (nValue & 0x00FF0000) >> 16;
+			if (*Data != '=')
+			{
+				nValue += DecodeTable[*Data++] << 6;
+				strDecode += (nValue & 0x0000FF00) >> 8;
+				if (*Data != '=')
+				{
+					nValue += DecodeTable[*Data++];
+					strDecode += nValue & 0x000000FF;
+				}
+			}
+			i += 4;
+		}
+		else// 回车换行,跳过
+		{
+			Data++;
+			i++;
+		}
+	}
+	return strDecode;
+}
+cv::Mat GeneralDetectionOp::GetRotateCropImage(const cv::Mat &srcimage,
+                                           std::vector<std::vector<int>> box) {
+  cv::Mat image;
+  srcimage.copyTo(image);
+  std::vector<std::vector<int>> points = box;
+  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  cv::Mat img_crop;
+  image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
+  for (int i = 0; i < points.size(); i++) {
+    points[i][0] -= left;
+    points[i][1] -= top;
+  }
+  int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
+                                pow(points[0][1] - points[1][1], 2)));
+  int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
+                                 pow(points[0][1] - points[3][1], 2)));
+  cv::Point2f pts_std[4];
+  pts_std[0] = cv::Point2f(0., 0.);
+  pts_std[1] = cv::Point2f(img_crop_width, 0.);
+  pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
+  pts_std[3] = cv::Point2f(0.f, img_crop_height);
+  cv::Point2f pointsf[4];
+  pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
+  pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
+  pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
+  pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
+  cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
+  cv::Mat dst_img;
+  cv::warpPerspective(img_crop, dst_img, M,
+                      cv::Size(img_crop_width, img_crop_height),
+                      cv::BORDER_REPLICATE);
+  if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
+    cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
+    cv::transpose(dst_img, srcCopy);
+    cv::flip(srcCopy, srcCopy, 0);
+    return srcCopy;
+  } else {
+    return dst_img;
+  }
+}
+DEFINE_OP(GeneralDetectionOp);
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
\ No newline at end of file
--- a/core/general-server/op/general_detection_op.h
+++ b/core/general-server/op/general_detection_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <string>
+#include <vector>
+#include <numeric>
+#include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "core/predictor/tools/ocrtools/postprocess_op.h"
+#include "core/predictor/tools/ocrtools/preprocess_op.h"
+#include "paddle_inference_api.h"  // NOLINT
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+class GeneralDetectionOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+  public:
+    typedef std::vector<paddle::PaddleTensor> TensorVector;
+    DECLARE_OP(GeneralDetectionOp);
+    int inference();
+  private:
+    //config info
+    bool use_gpu_ = false;
+    int gpu_id_ = 0;
+    int gpu_mem_ = 4000;
+    int cpu_math_library_num_threads_ = 4;
+    bool use_mkldnn_ = false;
+    // pre-process
+    PaddleOCR::ResizeImgType0 resize_op_;
+    PaddleOCR::Normalize normalize_op_;
+    PaddleOCR::Permute permute_op_;
+    PaddleOCR::CrnnResizeImg resize_op_rec;
+    bool use_tensorrt_ = false;
+    bool use_fp16_ = false;
+    // post-process
+    PaddleOCR::PostProcessor post_processor_;
+    //det config info
+    int max_side_len_ = 960;
+    double det_db_thresh_ = 0.3;
+    double det_db_box_thresh_ = 0.5;
+    double det_db_unclip_ratio_ = 2.0;
+    std::vector<float> mean_det = {0.485f, 0.456f, 0.406f};
+    std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
+    bool is_scale_ = true;
+    //rec config info
+    std::vector<std::string> label_list_;
+    std::vector<float> mean_rec = {0.5f, 0.5f, 0.5f};
+    std::vector<float> scale_rec = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
+    cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
+                              std::vector<std::vector<int>> box);
+    cv::Mat Base2Mat(std::string &base64_data);
+    std::string base64Decode(const char* Data, int DataByte);
+    std::vector<std::vector<std::vector<int>>> boxes;
+};
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
@@ -117,8 +117,9 @@ int GeneralDistKVQuantInferOp::inference() {
  std::unordered_map<int, int> in_out_map;
  baidu::paddle_serving::predictor::Resource &resource =
      baidu::paddle_serving::predictor::Resource::instance();
+  //TODO:Temporary addition, specific details to be studied by HexToString
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config();
+      resource.get_general_model_config()[0];
  int cube_quant_bits = resource.get_cube_quant_bits();
  size_t EMBEDDING_SIZE = 0;
  if (cube_quant_bits == 0) {

--- a/core/general-server/op/general_infer_op.cpp
+++ b/core/general-server/op/general_infer_op.cpp
@@ -44,9 +44,50 @@ int GeneralInferOp::inference() {
               << pre_node_names.size();
    return -1;
  }
-  if (InferManager::instance().infer(engine_name().c_str())) {
+  const std::string pre_name = pre_node_names[0];
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+      return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+      return -1;
+  }
+  output_blob->SetLogId(log_id);
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+  if (InferManager::instance().infer(
+          engine_name().c_str(), in, out, batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
    return -1;
  }
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
  return 0;
 }
 DEFINE_OP(GeneralInferOp);

--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -20,7 +20,6 @@
 #include "core/general-server/op/general_infer_helper.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
-#include "core/predictor/framework/resource.h"
 #include "core/util/include/timer.h"
 namespace baidu {
@@ -33,8 +32,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-using baidu::paddle_serving::predictor::InferManager;
+enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
 int conf_check(const Request *req,
               const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
  int var_num = req->insts(0).tensor_array_size();
@@ -48,17 +46,18 @@ int conf_check(const Request *req,
  VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size();
  for (int i = 0; i < var_num; ++i) {
+    const Tensor &tensor = req->insts(0).tensor_array(i);
    if (model_config->_feed_type[i] !=
-        req->insts(0).tensor_array(i).elem_type()) {
+        tensor.elem_type()) {
      LOG(ERROR) << "feed type not match.";
      return -1;
    }
    if (model_config->_feed_shape[i].size() ==
-        req->insts(0).tensor_array(i).shape_size()) {
+        tensor.shape_size()) {
      for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
-        req->insts(0).tensor_array(i).shape(j);
+        tensor.shape(j);
        if (model_config->_feed_shape[i][j] !=
-            req->insts(0).tensor_array(i).shape(j)) {
+            tensor.shape(j)) {
          LOG(ERROR) << "feed shape not match.";
          return -1;
        }
@@ -72,88 +71,178 @@ int conf_check(const Request *req,
 }
 int GeneralReaderOp::inference() {
-  // reade request from client
+  // read request from client
-  // TODO: only support one engine here
-  std::string engine_name = "general_infer_0";
  const Request *req = dynamic_cast<const Request *>(get_request_message());
  uint64_t log_id = req->log_id();
  int input_var_num = 0;
  std::vector<int64_t> elem_type;
  std::vector<int64_t> elem_size;
-  std::vector<int64_t> capacity;
+  std::vector<int64_t> databuf_size;
+  GeneralBlob *res = mutable_data<GeneralBlob>();
+  TensorVector *out = &(res->tensor_vector);
+  res->SetLogId(log_id);
+  if (!res) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed get op tls reader object output";
+  }
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
  int var_num = req->insts(0).tensor_array_size();
+  VLOG(2) << "(logid=" << log_id << ") var num: " << var_num
+          << ") start to call load general model_conf op";
  baidu::paddle_serving::predictor::Resource &resource =
      baidu::paddle_serving::predictor::Resource::instance();
+  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
+  //get the first InferOP's model_config as ReaderOp's model_config by default.
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config();
+      resource.get_general_model_config().front();
+  // TODO(guru4elephant): how to do conditional check?
+  /*
+  int ret = conf_check(req, model_config);
+  if (ret != 0) {
+    LOG(ERROR) << "model conf of server:";
+    resource.print_general_model_config(model_config);
+    return 0;
+  }
+  */
+  // package tensor
  elem_type.resize(var_num);
  elem_size.resize(var_num);
-  capacity.resize(var_num);
+  databuf_size.resize(var_num);
+  // prepare basic information for input
+  // specify the memory needed for output tensor_vector
+  // fill the data into output general_blob
+  int data_len = 0;
  for (int i = 0; i < var_num; ++i) {
-    std::string tensor_name = model_config->_feed_name[i];
+    paddle::PaddleTensor lod_tensor;
-    VLOG(2) << "(logid=" << log_id << ") get tensor name: " << tensor_name;
+    const Tensor &tensor = req->insts(0).tensor_array(i);
-    auto lod_tensor = InferManager::instance().GetInputHandle(
+    data_len = 0;
-        engine_name.c_str(), tensor_name.c_str());
+    elem_type[i] = tensor.elem_type();
-    std::vector<std::vector<size_t>> lod;
+    VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
-    std::vector<int> shape;
+    if (elem_type[i] == P_INT64) {  // int64
-    // get lod info here
+      elem_size[i] = sizeof(int64_t);
-    if (req->insts(0).tensor_array(i).lod_size() > 0) {
+      lod_tensor.dtype = paddle::PaddleDType::INT64;
-      lod.resize(1);
+      data_len = tensor.int64_data_size();
-      for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) {
+    } else if (elem_type[i] == P_FLOAT32) {
-        lod[0].push_back(req->insts(0).tensor_array(i).lod(k));
+      elem_size[i] = sizeof(float);
-      }
+      lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
-      capacity[i] = 1;
+      data_len = tensor.float_data_size();
-      for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
+    } else if (elem_type[i] == P_INT32) {
-        int dim = req->insts(0).tensor_array(i).shape(k);
+      elem_size[i] = sizeof(int32_t);
-        VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
+      lod_tensor.dtype = paddle::PaddleDType::INT32;
-                << "]: " << dim;
+      data_len = tensor.int_data_size();
-        capacity[i] *= dim;
+    } else if (elem_type[i] == P_STRING) {
-        shape.push_back(dim);
+      //use paddle::PaddleDType::UINT8 as for String.
+      elem_size[i] = sizeof(uint8_t);
+      lod_tensor.dtype = paddle::PaddleDType::UINT8;
+      //this is for vector<String>, cause the databuf_size != vector<String>.size()*sizeof(char);
+      for (int idx = 0; idx < tensor.data_size(); idx++) {
+        data_len += tensor.data()[idx].length();
      }
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
+    }
-              << "] is tensor, capacity: " << capacity[i];
+    // implement lod tensor here
-    } else {
+    // only support 1-D lod
-      capacity[i] = 1;
+    // TODO:support 2-D lod
-      for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
+    if (tensor.lod_size() > 0) {
-        int dim = req->insts(0).tensor_array(i).shape(k);
+      VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
-        VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
+      lod_tensor.lod.resize(1);
-                << "]: " << dim;
+      for (int k = 0; k < tensor.lod_size(); ++k) {
-        capacity[i] *= dim;
+        lod_tensor.lod[0].push_back(tensor.lod(k));
-        shape.push_back(dim);
      }
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is tensor, capacity: " << capacity[i];
    }
-    lod_tensor->SetLoD(lod);
-    lod_tensor->Reshape(shape);
+    for (int k = 0; k < tensor.shape_size(); ++k) {
-    // insert data here
+      int dim = tensor.shape(k);
-    if (req->insts(0).tensor_array(i).elem_type() == 0) {
+      VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
-      // TODO: Copy twice here, can optimize
+              << "]: " << dim;
-      int elem_num = req->insts(0).tensor_array(i).int64_data_size();
+      lod_tensor.shape.push_back(dim);
-      std::vector<int64_t> data(elem_num);
+    }
-      int64_t *dst_ptr = data.data();
+    lod_tensor.name = model_config->_feed_name[i];
+    out->push_back(lod_tensor);
+    VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
+            << "]: " << data_len;
+    databuf_size[i] = data_len * elem_size[i];
+    out->at(i).data.Resize(data_len * elem_size[i]);
+    VLOG(2) << "(logid=" << log_id << ") var[" << i
+            << "] is lod_tensor and len=" << out->at(i).lod[0].back();
+    if (elem_type[i] == P_INT64) {
+      int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
+      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+              << "] is " << tensor.int64_data(0);
+      if (!dst_ptr) {
+        LOG(ERROR) << "dst_ptr is nullptr";
+            return -1;
+      }
+      memcpy(dst_ptr, tensor.int64_data().data(),databuf_size[i]);
+      /*
+      int elem_num = tensor.int64_data_size();
      for (int k = 0; k < elem_num; ++k) {
-        dst_ptr[k] = req->insts(0).tensor_array(i).int64_data(k);
+        dst_ptr[k] = tensor.int64_data(k);
+      }
+      */
+    } else if (elem_type[i] == P_FLOAT32) {
+      float *dst_ptr = static_cast<float *>(out->at(i).data.data());
+      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+              << "] is " << tensor.float_data(0);
+      if (!dst_ptr) {
+        LOG(ERROR) << "dst_ptr is nullptr";
+            return -1;
      }
-      lod_tensor->CopyFromCpu(dst_ptr);
+      memcpy(dst_ptr, tensor.float_data().data(),databuf_size[i]);
-    } else if (req->insts(0).tensor_array(i).elem_type() == 1) {
+      /*int elem_num = tensor.float_data_size();
-      int elem_num = req->insts(0).tensor_array(i).float_data_size();
-      std::vector<float> data(elem_num);
-      float *dst_ptr = data.data();
      for (int k = 0; k < elem_num; ++k) {
-        dst_ptr[k] = req->insts(0).tensor_array(i).float_data(k);
+        dst_ptr[k] = tensor.float_data(k);
+      }*/
+    } else if (elem_type[i] == P_INT32) {
+      int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
+      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+              << "] is " << tensor.int_data(0);
+      if (!dst_ptr) {
+        LOG(ERROR) << "dst_ptr is nullptr";
+            return -1;
      }
-      lod_tensor->CopyFromCpu(dst_ptr);
+      memcpy(dst_ptr, tensor.int_data().data(),databuf_size[i]);
-    } else if (req->insts(0).tensor_array(i).elem_type() == 2) {
+      /*
-      int elem_num = req->insts(0).tensor_array(i).int_data_size();
+      int elem_num = tensor.int_data_size();
-      std::vector<int32_t> data(elem_num);
-      int32_t *dst_ptr = data.data();
      for (int k = 0; k < elem_num; ++k) {
-        dst_ptr[k] = req->insts(0).tensor_array(i).int_data(k);
+        dst_ptr[k] = tensor.int_data(k);
+      }
+      */
+    } else if (elem_type[i] == P_STRING) {
+      std::string *dst_ptr = static_cast<std::string *>(out->at(i).data.data());
+      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+              << "] is " << tensor.data(0);
+      if (!dst_ptr) {
+        LOG(ERROR) << "dst_ptr is nullptr";
+            return -1;
+      }
+      int elem_num = tensor.data_size();
+      for (int k = 0; k < elem_num; ++k) {
+        dst_ptr[k] = tensor.data(k);
      }
-      lod_tensor->CopyFromCpu(dst_ptr);
    }
  }
+  VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
+  timeline.Pause();
+  int64_t end = timeline.TimeStampUS();
+  res->p_size = 0;
+  res->_batch_size = 1;
+  AddBlobInfo(res, start);
+  AddBlobInfo(res, end);
+  VLOG(2) << "(logid=" << log_id << ") read data from client success";
  return 0;
 }
 DEFINE_OP(GeneralReaderOp);

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -40,59 +40,163 @@ using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
 int GeneralResponseOp::inference() {
+  const std::vector<std::string> pre_node_names = pre_names();
+  VLOG(2) << "pre node names size: " << pre_node_names.size();
+  const GeneralBlob *input_blob = nullptr;
+  int var_idx = 0;
+  int cap = 1;
+  uint64_t log_id =
+      get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
  const Request *req = dynamic_cast<const Request *>(get_request_message());
  // response inst with only fetch_var_names
  Response *res = mutable_data<Response>();
+  Timer timeline;
+  // double response_time = 0.0;
+  // timeline.Start();
+  int64_t start = timeline.TimeStampUS();
+  VLOG(2) << "(logid=" << log_id
+          << ") start to call load general model_conf op";
  baidu::paddle_serving::predictor::Resource &resource =
      baidu::paddle_serving::predictor::Resource::instance();
+  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
+  //get the last InferOP's model_config as ResponseOp's model_config by default.
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config();
+      resource.get_general_model_config().back();
-  std::vector<int> capacity(req->fetch_var_names_size(), 1);
-  std::string engine_name = "general_infer_0";
+  VLOG(2) << "(logid=" << log_id
-  ModelOutput *output = res->add_outputs();
+          << ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
-  FetchInst *fetch_inst = output->add_insts();
-  FetchInst *fetch_p = output->mutable_insts(0);
+  std::vector<int> fetch_index;
-  std::vector<std::string> outs =
+  fetch_index.resize(req->fetch_var_names_size());
-      InferManager::instance().GetOutputNames(engine_name.c_str());
  for (int i = 0; i < req->fetch_var_names_size(); ++i) {
-    Tensor *tensor = fetch_inst->add_tensor_array();
+    fetch_index[i] =
-    std::string tensor_name = outs[i];
+        model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
-    auto lod_tensor = InferManager::instance().GetOutputHandle(
+  }
-        engine_name.c_str(), tensor_name.c_str());
-    std::vector<int> shape = lod_tensor->shape();
+  for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
-    for (int k = 0; k < shape.size(); ++k) {
+    const std::string &pre_name = pre_node_names[pi];
-      capacity[i] *= shape[k];
+    VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
-      tensor->add_shape(shape[k]);
+            << " (" << pre_node_names.size() << ")";
+    input_blob = get_depend_argument<GeneralBlob>(pre_name);
+    // fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
+    // input_blob);
+    if (!input_blob) {
+      LOG(ERROR) << "(logid=" << log_id
+                 << ") Failed mutable depended argument, op: " << pre_name;
+      return -1;
+    }
+    const TensorVector *in = &input_blob->tensor_vector;
+    ModelOutput *output = res->add_outputs();
+    // To get the order of model return values
+    output->set_engine_name(pre_name);
+    FetchInst *fetch_inst = output->add_insts();
+    for (auto &idx : fetch_index) {
+      Tensor *tensor = fetch_inst->add_tensor_array();
+      //tensor->set_elem_type(1);
+      if (model_config->_is_lod_fetch[idx]) {
+        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+                << model_config->_fetch_name[idx] << " is lod_tensor";
+        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+          VLOG(2) << "(logid=" << log_id << ") shape[" << k
+                  << "]: " << in->at(idx).shape[k];
+          tensor->add_shape(in->at(idx).shape[k]);
+        }
+      } else {
+        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+                << model_config->_fetch_name[idx] << " is tensor";
+        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+          VLOG(2) << "(logid=" << log_id << ") shape[" << k
+                  << "]: " << in->at(idx).shape[k];
+          tensor->add_shape(in->at(idx).shape[k]);
+        }
+      }
    }
-    auto dtype = lod_tensor->type();
-    if (dtype == paddle::PaddleDType::INT64) {
+    var_idx = 0;
-      std::vector<int64_t> datas(capacity[i]);
+    for (auto &idx : fetch_index) {
-      int64_t *data_ptr = datas.data();
+      cap = 1;
-      lod_tensor->CopyToCpu(data_ptr);
+      for (int j = 0; j < in->at(idx).shape.size(); ++j) {
-      google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
+        cap *= in->at(idx).shape[j];
-                                                        data_ptr + capacity[i]);
+      }
-      tensor->mutable_int64_data()->Swap(&tmp_data);
-    } else if (dtype == paddle::PaddleDType::FLOAT32) {
+      FetchInst *fetch_p = output->mutable_insts(0);
-      std::vector<float> datas(capacity[i]);
+      auto dtype = in->at(idx).dtype;
-      float *data_ptr = datas.data();
+      if (dtype == paddle::PaddleDType::INT64) {
-      lod_tensor->CopyToCpu(data_ptr);
+        VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
-      google::protobuf::RepeatedField<float> tmp_data(data_ptr,
+                << model_config->_fetch_name[idx] << "].";
-                                                      data_ptr + capacity[i]);
+        int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
-      tensor->mutable_float_data()->Swap(&tmp_data);
+        // from
-    } else if (dtype == paddle::PaddleDType::INT32) {
+        // https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
-      std::vector<int32_t> datas(capacity[i]);
+        // `Swap` method is faster than `{}` method.
-      int32_t *data_ptr = datas.data();
+        google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
-      lod_tensor->CopyToCpu(data_ptr);
+                                                          data_ptr + cap);
-      google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
+        fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
-                                                        data_ptr + capacity[i]);
+            &tmp_data);
-      tensor->mutable_int_data()->Swap(&tmp_data);
+      } else if (dtype == paddle::PaddleDType::FLOAT32) {
+        VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
+                << model_config->_fetch_name[idx] << "].";
+        float *data_ptr = static_cast<float *>(in->at(idx).data.data());
+        google::protobuf::RepeatedField<float> tmp_data(data_ptr,
+                                                        data_ptr + cap);
+        fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
+            &tmp_data);
+      } else if (dtype == paddle::PaddleDType::INT32) {
+        VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
+                << model_config->_fetch_name[idx] << "].";
+        int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
+        google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
+                                                          data_ptr + cap);
+        fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
+            &tmp_data);
+      }
+      if (model_config->_is_lod_fetch[idx]) {
+        if (in->at(idx).lod.size() > 0) {
+          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_lod(
+                in->at(idx).lod[0][j]);
+          }
+        }
+      }
+      VLOG(2) << "(logid=" << log_id << ") fetch var ["
+              << model_config->_fetch_name[idx] << "] ready";
+      var_idx++;
    }
-    std::vector<std::vector<size_t>> lod = lod_tensor->lod();
+  }
-    if (lod.size() > 0) {
-      for (int j = 0; j < lod[0].size(); ++j) {
+  if (req->profile_server()) {
-        tensor->add_lod(lod[0][j]);
+    int64_t end = timeline.TimeStampUS();
+    // TODO(barriery): multi-model profile_time.
+    // At present, only the response_op is multi-input, so here we get
+    // the profile_time by hard coding. It needs to be replaced with
+    // a more elegant way.
+    for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
+      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
+      VLOG(2) << "(logid=" << log_id
+              << ") p size for input blob: " << input_blob->p_size;
+      int profile_time_idx = -1;
+      if (pi == 0) {
+        profile_time_idx = 0;
+      } else {
+        profile_time_idx = input_blob->p_size - 2;
+      }
+      for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
+        res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
      }
    }
+    // TODO(guru4elephant): find more elegant way to do this
+    res->add_profile_time(start);
+    res->add_profile_time(end);
  }
  return 0;
 }
@@ -101,4 +205,4 @@ DEFINE_OP(GeneralResponseOp);
 }  // namespace serving
 }  // namespace paddle_serving
 }  // namespace baidu
\ No newline at end of file
--- a/core/general-server/op/general_text_reader_op.cpp
+++ b/core/general-server/op/general_text_reader_op.cpp
@@ -73,7 +73,7 @@ int GeneralTextReaderOp::inference() {
  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config();
+      resource.get_general_model_config()[0];
  VLOG(2) << "(logid=" << log_id << ") print general model config done.";

--- a/core/general-server/op/general_text_response_op.cpp
+++ b/core/general-server/op/general_text_response_op.cpp
@@ -58,7 +58,7 @@ int GeneralTextResponseOp::inference() {
  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config();
+      resource.get_general_model_config().back();
  std::vector<int> fetch_index;
  fetch_index.resize(req->fetch_var_names_size());

--- a/core/predictor/common/utils.h
+++ b/core/predictor/common/utils.h
@@ -13,8 +13,10 @@
 // limitations under the License.
 #pragma once
-#include <string>
+#include <algorithm>
+#include <cctype>
 #include <fstream>
+#include <string>
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/common/macros.h"
@@ -26,6 +28,38 @@ namespace predictor {
 namespace butil = base;
 #endif
+enum class Precision {
+  kUnk = -1,     // unknown type
+  kFloat32 = 0,  // fp32
+  kInt8,         // int8
+  kHalf,         // fp16
+  kBfloat16,     // bf16
+};
+static std::string PrecisionTypeString(const Precision data_type) {
+  switch (data_type) {
+    case Precision::kFloat32:
+      return "kFloat32";
+    case Precision::kInt8:
+      return "kInt8";
+    case Precision::kHalf:
+      return "kHalf";
+    case Precision::kBfloat16:
+      return "kBloat16";
+    default:
+      return "unUnk";
+  }
+}
+static std::string ToLower(const std::string& data) {
+  std::string result = data;
+  std::transform(
+      result.begin(), result.end(), result.begin(), [](unsigned char c) {
+        return tolower(c);
+      });
+  return result;
+}
 class TimerFlow {
 public:
  static const int MAX_SIZE = 1024;

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -20,7 +20,9 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <numeric>
 #include "core/predictor/common/inner_common.h"
+#include "core/predictor/framework/bsf.h"
 #include "core/predictor/framework/factory.h"
 #include "core/predictor/framework/infer_data.h"
 #include "paddle_inference_api.h"  // NOLINT
@@ -66,7 +68,7 @@ class InferEngine {
  virtual int thrd_initialize() { return thrd_initialize_impl(); }
  virtual int thrd_clear() { return thrd_clear_impl(); }
  virtual int thrd_finalize() { return thrd_finalize_impl(); }
-  virtual int infer() { return infer_impl(); }
+  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); }
  virtual int reload() = 0;
@@ -79,13 +81,12 @@ class InferEngine {
  virtual int thrd_finalize_impl() = 0;
  virtual int thrd_clear_impl() = 0;
  virtual int proc_finalize_impl() = 0;
-  virtual std::vector<std::string> GetInputNames() = 0;
+  virtual int infer_impl(const void* in,
-  virtual std::vector<std::string> GetOutputNames() = 0;
+                          void* out,
-  virtual std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
+                          uint32_t batch_size = -1) = 0;
-      const std::string& name) = 0;
+  virtual int task_infer_impl(const BatchTensor& in,
-  virtual std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
+                          BatchTensor& out) = 0;  // NOLINT
-      const std::string& name) = 0;
-  virtual int infer_impl() = 0;
  // end: framework inner call
 };
@@ -100,6 +101,7 @@ class ReloadableInferEngine : public InferEngine {
  };
  virtual int load(const configure::EngineDesc& conf) = 0;
+  typedef im::bsf::Task<Tensor, Tensor> TaskT;
  int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
    _reload_tag_file = conf.reloadable_meta();
@@ -130,10 +132,45 @@ class ReloadableInferEngine : public InferEngine {
      LOG(ERROR) << "Failed proc initialize impl";
      return -1;
    }
+    // init bsf framework
+    if (_infer_thread_num <= 0) {
+      return 0;
+    }
+    // init bsf framework
+    im::bsf::TaskExecutor<TaskT>::instance()->set_thread_init_fn(
+        boost::bind(&InferEngine::thrd_initialize_impl, this));
+    im::bsf::TaskExecutor<TaskT>::instance()->set_thread_reset_fn(
+        boost::bind(&InferEngine::thrd_clear_impl, this));
+    im::bsf::TaskExecutor<TaskT>::instance()->set_thread_callback_fn(
+        boost::bind(&InferEngine::task_infer_impl, this, _1, _2));
+    im::bsf::TaskExecutor<TaskT>::instance()->set_batch_size(_infer_batch_size);
+    im::bsf::TaskExecutor<TaskT>::instance()->set_batch_align(
+        _infer_batch_align);
+    if (im::bsf::TaskExecutor<TaskT>::instance()->start(_infer_thread_num) !=
+        0) {
+      LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num;
+      return -1;
+    }
+    LOG(WARNING) << "Enable batch schedule framework, thread_num:"
+                 << _infer_thread_num << ", batch_size:" << _infer_batch_size
+                 << ", enable_batch_align:" << _infer_batch_align;
    return 0;
  }
-  int infer() { return infer_impl(); }
+  int infer(const void* in, void* out, uint32_t batch_size = -1) {
+    if (_infer_thread_num <= 0) {
+      return infer_impl(in, out, batch_size);
+    }
+    im::bsf::TaskManager<Tensor, Tensor> task_manager;
+    task_manager.schedule(*(reinterpret_cast<const BatchTensor*>(in)),
+                          *(reinterpret_cast<BatchTensor*>(out)));
+    task_manager.wait();
+    return 0;
+  }
  int thrd_initialize() {
    if (_infer_thread_num > 0) {
@@ -156,6 +193,9 @@ class ReloadableInferEngine : public InferEngine {
      return -1;
    }
+    if (_infer_thread_num > 0) {
+      im::bsf::TaskExecutor<TaskT>::instance()->stop();
+    }
    return 0;
  }
@@ -306,6 +346,10 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
  virtual int thrd_initialize_impl() {
    // memory pool to be inited in non-serving-threads
+    if (MempoolWrapper::instance().thread_initialize() != 0) {
+      LOG(ERROR) << "Failed thread initialize mempool";
+      return -1;
+    }
    ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
    if (!md || load_data(md, _conf) != 0) {
@@ -315,12 +359,17 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
    }
    THREAD_SETSPECIFIC(_skey, md);
+    im::bsf::AutoMutex lock(_mutex);
    _reload_vec.push_back(md);
    return 0;
  }
  int thrd_clear_impl() {
    // for non-serving-threads
+    if (MempoolWrapper::instance().thread_clear() != 0) {
+      LOG(ERROR) << "Failed thread clear mempool";
+      return -1;
+    }
    return 0;
  }
@@ -418,6 +467,12 @@ class CloneDBReloadableInferEngine
  }
  virtual int thrd_initialize_impl() {
+    // memory pool to be inited in non-serving-threads
+    if (MempoolWrapper::instance().thread_initialize() != 0) {
+      LOG(ERROR) << "Failed thread initialize mempool";
+      return -1;
+    }
    ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
    if (!md || load_data(md, _pd->cores[_pd->current_idx]) != 0) {
      LOG(ERROR) << "Failed clone thread data, origin_core["
@@ -426,6 +481,7 @@ class CloneDBReloadableInferEngine
    }
    THREAD_SETSPECIFIC(DBReloadableInferEngine<EngineCore>::_skey, md);
+    im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
    DBReloadableInferEngine<EngineCore>::_reload_vec.push_back(md);
    return 0;
  }
@@ -444,58 +500,119 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
 public:  // NOLINT
  FluidInferEngine() {}
  ~FluidInferEngine() {}
-  std::vector<std::string> GetInputNames() {
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
-    PaddleInferenceCore* core =
+  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
-        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
+    //First of all, get the real core acording to the template parameter 'PaddleInferenceCore'.
-    if (!core || !core->get()) {
+    PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core();
-      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
-    }
-    return core->GetInputNames();
-  }
-  std::vector<std::string> GetOutputNames() {
-    PaddleInferenceCore* core =
-        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
-    if (!core || !core->get()) {
-      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
-    }
-    return core->GetOutputNames();
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
-      const std::string& name) {
-    PaddleInferenceCore* core =
-        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
-    if (!core || !core->get()) {
-      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
-    }
-    return core->GetInputHandle(name);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
-      const std::string& name) {
-    PaddleInferenceCore* core =
-        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
-    if (!core || !core->get()) {
-      LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
-    }
-    return core->GetOutputHandle(name);
-  }
-  int infer_impl() {
-    PaddleInferenceCore* core =
-        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in infer_impl()";
      return -1;
    }
+    //We use the for loop to process the input data.
+    //Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData.
+    //Set the lod and shape information of InputData first. then copy data from cpu to the core.
+    const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in);
+    for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
+      auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name);
+      lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
+      lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
+      void* origin_data = (*tensorVector_in_pointer)[i].data.data();
+      //Because the core needs to determine the size of memory space according to the data type passed in.
+      //The pointer type of data must be one of float *,int64_t*,int32_t* instead void*.
+      if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
+        float* data = static_cast<float*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
+      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) {
+        int64_t* data = static_cast<int64_t*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
+      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) {
+        int32_t* data = static_cast<int32_t*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
+      }
+    }
+    //After the input data is passed in, call 'core->Run()' perform the prediction process.
    if (!core->Run()) {
-      LOG(ERROR) << "Failed run fluid family core";
+        LOG(ERROR) << "Failed run fluid family core";
+        return -1;
+    }
+    //In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}).
+    //Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
+    std::vector<std::string> outnames = core->GetOutputNames();
+    std::vector<int> output_shape;
+    int out_num =0;
+    int dataType =0;
+    void* databuf_data = NULL;
+    char* databuf_char = NULL;
+    size_t databuf_size = 0;
+    TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out);
+    if (!tensorVector_out_pointer) {
+      LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
      return -1;
    }
+    //Get the type and shape information of OutputData first. then copy data to cpu from the core.
+    //The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*.
+    for (int i=0; i < outnames.size(); ++i) {
+      auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
+      output_shape = lod_tensor_out->shape();
+      out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
+      dataType = lod_tensor_out->type();
+      if (dataType == paddle::PaddleDType::FLOAT32) {
+        databuf_size = out_num*sizeof(float);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+            LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+            return -1;
+        }
+        float* data_out = reinterpret_cast<float*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
+      }else if (dataType == paddle::PaddleDType::INT64) {
+        databuf_size = out_num*sizeof(int64_t);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+            LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+            return -1;
+        }
+        int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
+      }else if (dataType == paddle::PaddleDType::INT32) {
+        databuf_size = out_num*sizeof(int32_t);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+            LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+            return -1;
+        }
+        int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
+      }
+      //Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs.
+      //We need to copy the processed data to the 'Channel' for the next OP.
+      //In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’)
+      paddle::PaddleTensor tensor_out;
+      tensor_out.name = outnames[i];
+      tensor_out.dtype = paddle::PaddleDType(dataType);
+      tensor_out.shape.assign(output_shape.begin(), output_shape.end());
+      std::vector<std::vector<size_t>> out_lod = lod_tensor_out->lod();
+      for (int li=0; li < out_lod.size(); ++li) {
+        std::vector<size_t> lod_element;
+        lod_element.assign(out_lod[li].begin(), out_lod[li].end());
+        tensor_out.lod.push_back(lod_element);
+      }
+      paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
+      tensor_out.data = paddleBuf;
+      tensorVector_out_pointer->push_back(tensor_out);
+    }
    return 0;
  }
+  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
+    return infer_impl(&in, &out);
+  }
 };
 typedef FactoryPool<InferEngine> StaticInferFactory;
@@ -621,45 +738,13 @@ class VersionedInferEngine : public InferEngine {
    return _versions.begin()->second;
  }
-  int infer() {
+  int infer(const void* in, void* out, uint32_t batch_size) {
    InferEngine* engine = default_engine();
    if (!engine) {
      LOG(WARNING) << "fail to get default engine";
      return -1;
    }
-    return engine->infer();
+    return engine->infer(in, out, batch_size);
-  }
-  std::vector<std::string> GetInputNames() {
-    InferEngine* engine = default_engine();
-    if (!engine) {
-      LOG(WARNING) << "fail to get default engine";
-    }
-    return engine->GetInputNames();
-  }
-  std::vector<std::string> GetOutputNames() {
-    InferEngine* engine = default_engine();
-    if (!engine) {
-      LOG(WARNING) << "fail to get default engine";
-    }
-    return engine->GetOutputNames();
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
-      const std::string& name) {
-    InferEngine* engine = default_engine();
-    if (!engine) {
-      LOG(WARNING) << "fail to get default engine";
-    }
-    return engine->GetInputHandle(name);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
-      const std::string& name) {
-    InferEngine* engine = default_engine();
-    if (!engine) {
-      LOG(WARNING) << "fail to get default engine";
-    }
-    return engine->GetOutputHandle(name);
  }
  template <typename T>
@@ -678,47 +763,14 @@ class VersionedInferEngine : public InferEngine {
  }
  // versioned inference interface
-  int infer(uint64_t version) {
+  int infer(const void* in, void* out, uint32_t batch_size, uint64_t version) {
    auto iter = _versions.find(version);
    if (iter == _versions.end()) {
      LOG(ERROR) << "Not found version engine: " << version;
      return -1;
    }
-    return iter->second->infer();
+    return iter->second->infer(in, out, batch_size);
-  }
-  std::vector<std::string> GetInputNames(uint64_t version) {
-    auto iter = _versions.find(version);
-    if (iter == _versions.end()) {
-      LOG(ERROR) << "Not found version engine: " << version;
-    }
-    return iter->second->GetInputNames();
-  }
-  std::vector<std::string> GetOutputNames(uint64_t version) {
-    auto iter = _versions.find(version);
-    if (iter == _versions.end()) {
-      LOG(ERROR) << "Not found version engine: " << version;
-    }
-    return iter->second->GetOutputNames();
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
-      uint64_t version, const std::string& name) {
-    auto iter = _versions.find(version);
-    if (iter == _versions.end()) {
-      LOG(ERROR) << "Not found version engine: " << version;
-    }
-    return iter->second->GetInputHandle(name);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
-      uint64_t version, const std::string& name) {
-    auto iter = _versions.find(version);
-    if (iter == _versions.end()) {
-      LOG(ERROR) << "Not found version engine: " << version;
-    }
-    return iter->second->GetOutputHandle(name);
  }
  template <typename T>
@@ -745,7 +797,10 @@ class VersionedInferEngine : public InferEngine {
  int thrd_finalize_impl() { return -1; }
  int thrd_clear_impl() { return -1; }
  int proc_finalize_impl() { return -1; }
-  int infer_impl() { return -1; }
+  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; }
+  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
+    return -1;
+  }  // NOLINT
 private:
  boost::unordered_map<uint64_t, InferEngine*> _versions;
@@ -843,44 +898,16 @@ class InferManager {
  }
  // Inference interface
-  int infer(const char* model_name) {
+  int infer(const char* model_name,
+            const void* in,
+            void* out,
+            uint32_t batch_size = -1) {
    auto it = _map.find(model_name);
    if (it == _map.end()) {
      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
      return -1;
    }
-    return it->second->infer();
+    return it->second->infer(in, out, batch_size);
-  }
-  std::vector<std::string> GetInputNames(const char* model_name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetInputNames();
-  }
-  std::vector<std::string> GetOutputNames(const char* model_name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetOutputNames();
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
-      const char* model_name, const std::string& name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetInputHandle(name);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
-      const char* model_name, const std::string& name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetOutputHandle(name);
  }
  template <typename T>
@@ -900,48 +927,19 @@ class InferManager {
  }
  // Versioned inference interface
-  int infer(const char* model_name, uint64_t version) {
+  int infer(const char* model_name, 
+            const void* in,
+            void* out,
+            uint32_t batch_size,
+            uint64_t version) {
    auto it = _map.find(model_name);
    if (it == _map.end()) {
      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
      return -1;
    }
-    return it->second->infer(version);
+    return it->second->infer(in, out, batch_size, version);
-  }
-  std::vector<std::string> GetInputNames(const char* model_name,
-                                         uint64_t version) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetInputNames(version);
  }
-  std::vector<std::string> GetOutputNames(const char* model_name,
-                                          uint64_t version) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetOutputNames(version);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
-      const char* model_name, uint64_t version, const std::string& name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetInputHandle(version, name);
-  }
-  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
-      const char* model_name, uint64_t version, const std::string& name) {
-    auto it = _map.find(model_name);
-    if (it == _map.end()) {
-      LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
-    }
-    return it->second->GetOutputHandle(version, name);
-  }
  template <typename T>
  T* get_core(const char* model_name, uint64_t version) {
    auto it = _map.find(model_name);

--- a/core/predictor/framework/infer_data.h
+++ b/core/predictor/framework/infer_data.h
@@ -21,7 +21,7 @@ namespace baidu {
 namespace paddle_serving {
 namespace predictor {
-enum DataType { FLOAT32, INT64 };
+enum DataType { FLOAT32, INT64, INT32 };
 class DataBuf {
 public:
@@ -80,8 +80,10 @@ struct Tensor {
  size_t ele_byte() const {
    if (type == INT64) {
      return sizeof(int64_t);
-    } else {
+    } else if (type == FLOAT32) {
      return sizeof(float);
+    } else {
+      return sizeof(int32_t);
    }
  }

--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -42,8 +42,8 @@ DynamicResource::~DynamicResource() {}
 int DynamicResource::initialize() { return 0; }
-std::shared_ptr<PaddleGeneralModelConfig> Resource::get_general_model_config() {
+std::vector<std::shared_ptr<PaddleGeneralModelConfig> > Resource::get_general_model_config() {
-  return _config;
+  return _configs;
 }
 void Resource::print_general_model_config(
@@ -149,30 +149,23 @@ int Resource::initialize(const std::string& path, const std::string& file) {
 #endif
  if (FLAGS_enable_model_toolkit) {
-    int err = 0;
+    size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
-    std::string model_toolkit_path = resource_conf.model_toolkit_path();
+    for (size_t mi=0; mi < model_toolkit_num; ++mi) {
-    if (err != 0) {
+      std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
-      LOG(ERROR) << "read model_toolkit_path failed, path[" << path
+      std::string model_toolkit_file = resource_conf.model_toolkit_file(mi);
-                 << "], file[" << file << "]";
-      return -1;
+      if (InferManager::instance().proc_initialize(
-    }
+              model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
-    std::string model_toolkit_file = resource_conf.model_toolkit_file();
+        LOG(ERROR) << "failed proc initialize modeltoolkit, config: "
-    if (err != 0) {
+                  << model_toolkit_path << "/" << model_toolkit_file;
-      LOG(ERROR) << "read model_toolkit_file failed, path[" << path
+        return -1;
-                 << "], file[" << file << "]";
+      }
-      return -1;
-    }
-    if (InferManager::instance().proc_initialize(
-            model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
-      LOG(ERROR) << "failed proc initialize modeltoolkit, config: "
-                 << model_toolkit_path << "/" << model_toolkit_file;
-      return -1;
-    }
-    if (KVManager::instance().proc_initialize(
+      if (KVManager::instance().proc_initialize(
-            model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
+              model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
-      LOG(ERROR) << "Failed proc initialize kvmanager, config: "
+        LOG(ERROR) << "Failed proc initialize kvmanager, config: "
-                 << model_toolkit_path << "/" << model_toolkit_file;
+                  << model_toolkit_path << "/" << model_toolkit_file;
+      }
    }
  }
@@ -231,80 +224,79 @@ int Resource::general_model_initialize(const std::string& path,
    LOG(ERROR) << "Failed initialize resource from: " << path << "/" << file;
    return -1;
  }
-  int err = 0;
+  size_t general_model_num = resource_conf.general_model_path_size();
-  std::string general_model_path = resource_conf.general_model_path();
+  for (size_t gi=0; gi < general_model_num; ++gi) {
-  std::string general_model_file = resource_conf.general_model_file();
-  if (err != 0) {
-    LOG(ERROR) << "read general_model_path failed, path[" << path << "], file["
-               << file << "]";
-    return -1;
-  }
-  GeneralModelConfig model_config;
-  if (configure::read_proto_conf(general_model_path.c_str(),
-                                 general_model_file.c_str(),
-                                 &model_config) != 0) {
-    LOG(ERROR) << "Failed initialize model config from: " << general_model_path
-               << "/" << general_model_file;
-    return -1;
-  }
-  _config.reset(new PaddleGeneralModelConfig());
+    std::string general_model_path = resource_conf.general_model_path(gi);
-  int feed_var_num = model_config.feed_var_size();
+    std::string general_model_file = resource_conf.general_model_file(gi);
-  VLOG(2) << "load general model config";
-  VLOG(2) << "feed var num: " << feed_var_num;
+    GeneralModelConfig model_config;
-  _config->_feed_name.resize(feed_var_num);
+    if (configure::read_proto_conf(general_model_path.c_str(),
-  _config->_feed_alias_name.resize(feed_var_num);
+                                  general_model_file.c_str(),
-  _config->_feed_type.resize(feed_var_num);
+                                  &model_config) != 0) {
-  _config->_is_lod_feed.resize(feed_var_num);
+      LOG(ERROR) << "Failed initialize model config from: " << general_model_path
-  _config->_capacity.resize(feed_var_num);
+                << "/" << general_model_file;
-  _config->_feed_shape.resize(feed_var_num);
+      return -1;
-  for (int i = 0; i < feed_var_num; ++i) {
+    }
-    _config->_feed_name[i] = model_config.feed_var(i).name();
+    auto _config = std::make_shared<PaddleGeneralModelConfig>();
-    _config->_feed_alias_name[i] = model_config.feed_var(i).alias_name();
+    int feed_var_num = model_config.feed_var_size();
-    VLOG(2) << "feed var[" << i << "]: " << _config->_feed_name[i];
+    VLOG(2) << "load general model config";
-    VLOG(2) << "feed var[" << i << "]: " << _config->_feed_alias_name[i];
+    VLOG(2) << "feed var num: " << feed_var_num;
-    _config->_feed_type[i] = model_config.feed_var(i).feed_type();
+    _config->_feed_name.resize(feed_var_num);
-    VLOG(2) << "feed type[" << i << "]: " << _config->_feed_type[i];
+    _config->_feed_alias_name.resize(feed_var_num);
+    _config->_feed_type.resize(feed_var_num);
-    if (model_config.feed_var(i).is_lod_tensor()) {
+    _config->_is_lod_feed.resize(feed_var_num);
-      VLOG(2) << "var[" << i << "] is lod tensor";
+    _config->_capacity.resize(feed_var_num);
-      _config->_feed_shape[i] = {-1};
+    _config->_feed_shape.resize(feed_var_num);
-      _config->_is_lod_feed[i] = true;
+    for (int i=0; i < feed_var_num; ++i) {
-    } else {
+      _config->_feed_name[i] = model_config.feed_var(i).name();
-      VLOG(2) << "var[" << i << "] is tensor";
+      _config->_feed_alias_name[i] = model_config.feed_var(i).alias_name();
-      _config->_capacity[i] = 1;
+      VLOG(2) << "feed var[" << i << "]: " << _config->_feed_name[i];
-      _config->_is_lod_feed[i] = false;
+      VLOG(2) << "feed var[" << i << "]: " << _config->_feed_alias_name[i];
-      for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
+      _config->_feed_type[i] = model_config.feed_var(i).feed_type();
-        int32_t dim = model_config.feed_var(i).shape(j);
+      VLOG(2) << "feed type[" << i << "]: " << _config->_feed_type[i];
-        VLOG(2) << "var[" << i << "].shape[" << i << "]: " << dim;
-        _config->_feed_shape[i].push_back(dim);
+      if (model_config.feed_var(i).is_lod_tensor()) {
-        _config->_capacity[i] *= dim;
+        VLOG(2) << "var[" << i << "] is lod tensor";
+        _config->_feed_shape[i] = {-1};
+        _config->_is_lod_feed[i] = true;
+      } else {
+        VLOG(2) << "var[" << i << "] is tensor";
+        _config->_capacity[i] = 1;
+        _config->_is_lod_feed[i] = false;
+        for (int j=0; j < model_config.feed_var(i).shape_size(); ++j) {
+          int32_t dim = model_config.feed_var(i).shape(j);
+          VLOG(2) << "var[" << i << "].shape[" << i << "]: " << dim;
+          _config->_feed_shape[i].push_back(dim);
+          _config->_capacity[i] *= dim;
+        }
      }
    }
-  }
-  int fetch_var_num = model_config.fetch_var_size();
+    int fetch_var_num = model_config.fetch_var_size();
-  _config->_is_lod_fetch.resize(fetch_var_num);
+    _config->_is_lod_fetch.resize(fetch_var_num);
-  _config->_fetch_name.resize(fetch_var_num);
+    _config->_fetch_name.resize(fetch_var_num);
-  _config->_fetch_alias_name.resize(fetch_var_num);
+    _config->_fetch_alias_name.resize(fetch_var_num);
-  _config->_fetch_shape.resize(fetch_var_num);
+    _config->_fetch_shape.resize(fetch_var_num);
-  for (int i = 0; i < fetch_var_num; ++i) {
+    for (int i=0; i < fetch_var_num; ++i) {
-    _config->_fetch_name[i] = model_config.fetch_var(i).name();
+      _config->_fetch_name[i] = model_config.fetch_var(i).name();
-    _config->_fetch_alias_name[i] = model_config.fetch_var(i).alias_name();
+      _config->_fetch_alias_name[i] = model_config.fetch_var(i).alias_name();
-    _config->_fetch_name_to_index[_config->_fetch_name[i]] = i;
+      _config->_fetch_name_to_index[_config->_fetch_name[i]] = i;
-    _config->_fetch_alias_name_to_index[_config->_fetch_alias_name[i]] = i;
+      _config->_fetch_alias_name_to_index[_config->_fetch_alias_name[i]] = i;
-    if (model_config.fetch_var(i).is_lod_tensor()) {
+      if (model_config.fetch_var(i).is_lod_tensor()) {
-      VLOG(2) << "fetch var[" << i << "] is lod tensor";
+        VLOG(2) << "fetch var[" << i << "] is lod tensor";
-      _config->_fetch_shape[i] = {-1};
+        _config->_fetch_shape[i] = {-1};
-      _config->_is_lod_fetch[i] = true;
+        _config->_is_lod_fetch[i] = true;
-    } else {
+      } else {
-      _config->_is_lod_fetch[i] = false;
+        _config->_is_lod_fetch[i] = false;
-      for (int j = 0; j < model_config.fetch_var(i).shape_size(); ++j) {
+        for (int j=0; j < model_config.fetch_var(i).shape_size(); ++j) {
-        int dim = model_config.fetch_var(i).shape(j);
+          int dim = model_config.fetch_var(i).shape(j);
-        _config->_fetch_shape[i].push_back(dim);
+          _config->_fetch_shape[i].push_back(dim);
+        }
      }
    }
+    _configs.push_back(std::move(_config));
  }
  return 0;
 }

--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
@@ -94,7 +94,7 @@ class Resource {
  int finalize();
-  std::shared_ptr<PaddleGeneralModelConfig> get_general_model_config();
+  std::vector<std::shared_ptr<PaddleGeneralModelConfig> > get_general_model_config();
  void print_general_model_config(
      const std::shared_ptr<PaddleGeneralModelConfig>& config);
@@ -107,7 +107,7 @@ class Resource {
 private:
  int thread_finalize() { return 0; }
-  std::shared_ptr<PaddleGeneralModelConfig> _config;
+  std::vector<std::shared_ptr<PaddleGeneralModelConfig> > _configs;
  std::string cube_config_fullpath;
  int cube_quant_bits;  // 0 if no empty

--- a/core/predictor/framework/service.cpp
+++ b/core/predictor/framework/service.cpp
--- a/core/predictor/op/op.cpp
+++ b/core/predictor/op/op.cpp
--- a/core/predictor/src/pdserving.cpp
+++ b/core/predictor/src/pdserving.cpp
@@ -126,7 +126,7 @@ int main(int argc, char** argv) {
    return 0;
  }
-  google::ParseCommandLineFlags(&argc, &argv, true);
+  //google::ParseCommandLineFlags(&argc, &argv, true);
  g_change_server_port();
@@ -202,6 +202,7 @@ int main(int argc, char** argv) {
  }
  VLOG(2) << "Succ call pthread worker start function";
+  //this is not used by any code segment,which can be cancelled.
  if (Resource::instance().general_model_initialize(FLAGS_resource_path,
                                                    FLAGS_resource_file) != 0) {
    LOG(ERROR) << "Failed to initialize general model conf: "

--- a/core/predictor/tools/ocrtools/clipper.cpp
+++ b/core/predictor/tools/ocrtools/clipper.cpp
+/*******************************************************************************
+*                                                                              *
+* Author    :  Angus Johnson                                                   *
+* Version   :  6.4.2                                                           *
+* Date      :  27 February 2017                                                *
+* Website   :  http://www.angusj.com                                           *
+* Copyright :  Angus Johnson 2010-2017                                         *
+*                                                                              *
+* License:                                                                     *
+* Use, modification & distribution is subject to Boost Software License Ver 1. *
+* http://www.boost.org/LICENSE_1_0.txt                                         *
+*                                                                              *
+* Attributions:                                                                *
+* The code in this library is an extension of Bala Vatti's clipping algorithm: *
+* "A generic solution to polygon clipping"                                     *
+* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63.             *
+* http://portal.acm.org/citation.cfm?id=129906                                 *
+*                                                                              *
+* Computer graphics and geometric modeling: implementation and algorithms      *
+* By Max K. Agoston                                                            *
+* Springer; 1 edition (January 4, 2005)                                        *
+* http://books.google.com/books?q=vatti+clipping+agoston                       *
+*                                                                              *
+* See also:                                                                    *
+* "Polygon Offsetting by Computing Winding Numbers"                            *
+* Paper no. DETC2005-85513 pp. 565-575                                         *
+* ASME 2005 International Design Engineering Technical Conferences             *
+* and Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+* September 24-28, 2005 , Long Beach, California, USA                          *
+* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf              *
+*                                                                              *
+*******************************************************************************/
+/*******************************************************************************
+*                                                                              *
+* This is a translation of the Delphi Clipper library and the naming style     *
+* used has retained a Delphi flavour.                                          *
+*                                                                              *
+*******************************************************************************/
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <ostream>
+#include <stdexcept>
+#include <vector>
+#include "clipper.h"
+namespace ClipperLib {
+static double const pi = 3.141592653589793238;
+static double const two_pi = pi * 2;
+static double const def_arc_tolerance = 0.25;
+enum Direction { dRightToLeft, dLeftToRight };
+static int const Unassigned = -1; // edge not currently 'owning' a solution
+static int const Skip = -2;       // edge that would otherwise close a path
+#define HORIZONTAL (-1.0E+40)
+#define TOLERANCE (1.0e-20)
+#define NEAR_ZERO(val) (((val) > -TOLERANCE) && ((val) < TOLERANCE))
+struct TEdge {
+  IntPoint Bot;
+  IntPoint Curr; // current (updated for every new scanbeam)
+  IntPoint Top;
+  double Dx;
+  PolyType PolyTyp;
+  EdgeSide Side; // side only refers to current side of solution poly
+  int WindDelta; // 1 or -1 depending on winding direction
+  int WindCnt;
+  int WindCnt2; // winding count of the opposite polytype
+  int OutIdx;
+  TEdge *Next;
+  TEdge *Prev;
+  TEdge *NextInLML;
+  TEdge *NextInAEL;
+  TEdge *PrevInAEL;
+  TEdge *NextInSEL;
+  TEdge *PrevInSEL;
+};
+struct IntersectNode {
+  TEdge *Edge1;
+  TEdge *Edge2;
+  IntPoint Pt;
+};
+struct LocalMinimum {
+  cInt Y;
+  TEdge *LeftBound;
+  TEdge *RightBound;
+};
+struct OutPt;
+// OutRec: contains a path in the clipping solution. Edges in the AEL will
+// carry a pointer to an OutRec when they are part of the clipping solution.
+struct OutRec {
+  int Idx;
+  bool IsHole;
+  bool IsOpen;
+  OutRec *FirstLeft; // see comments in clipper.pas
+  PolyNode *PolyNd;
+  OutPt *Pts;
+  OutPt *BottomPt;
+};
+struct OutPt {
+  int Idx;
+  IntPoint Pt;
+  OutPt *Next;
+  OutPt *Prev;
+};
+struct Join {
+  OutPt *OutPt1;
+  OutPt *OutPt2;
+  IntPoint OffPt;
+};
+struct LocMinSorter {
+  inline bool operator()(const LocalMinimum &locMin1,
+                         const LocalMinimum &locMin2) {
+    return locMin2.Y < locMin1.Y;
+  }
+};
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+inline cInt Round(double val) {
+  if ((val < 0))
+    return static_cast<cInt>(val - 0.5);
+  else
+    return static_cast<cInt>(val + 0.5);
+}
+//------------------------------------------------------------------------------
+inline cInt Abs(cInt val) { return val < 0 ? -val : val; }
+//------------------------------------------------------------------------------
+// PolyTree methods ...
+//------------------------------------------------------------------------------
+void PolyTree::Clear() {
+  for (PolyNodes::size_type i = 0; i < AllNodes.size(); ++i)
+    delete AllNodes[i];
+  AllNodes.resize(0);
+  Childs.resize(0);
+}
+//------------------------------------------------------------------------------
+PolyNode *PolyTree::GetFirst() const {
+  if (!Childs.empty())
+    return Childs[0];
+  else
+    return 0;
+}
+//------------------------------------------------------------------------------
+int PolyTree::Total() const {
+  int result = (int)AllNodes.size();
+  // with negative offsets, ignore the hidden outer polygon ...
+  if (result > 0 && Childs[0] != AllNodes[0])
+    result--;
+  return result;
+}
+//------------------------------------------------------------------------------
+// PolyNode methods ...
+//------------------------------------------------------------------------------
+PolyNode::PolyNode() : Parent(0), Index(0), m_IsOpen(false) {}
+//------------------------------------------------------------------------------
+int PolyNode::ChildCount() const { return (int)Childs.size(); }
+//------------------------------------------------------------------------------
+void PolyNode::AddChild(PolyNode &child) {
+  unsigned cnt = (unsigned)Childs.size();
+  Childs.push_back(&child);
+  child.Parent = this;
+  child.Index = cnt;
+}
+//------------------------------------------------------------------------------
+PolyNode *PolyNode::GetNext() const {
+  if (!Childs.empty())
+    return Childs[0];
+  else
+    return GetNextSiblingUp();
+}
+//------------------------------------------------------------------------------
+PolyNode *PolyNode::GetNextSiblingUp() const {
+  if (!Parent) // protects against PolyTree.GetNextSiblingUp()
+    return 0;
+  else if (Index == Parent->Childs.size() - 1)
+    return Parent->GetNextSiblingUp();
+  else
+    return Parent->Childs[Index + 1];
+}
+//------------------------------------------------------------------------------
+bool PolyNode::IsHole() const {
+  bool result = true;
+  PolyNode *node = Parent;
+  while (node) {
+    result = !result;
+    node = node->Parent;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+bool PolyNode::IsOpen() const { return m_IsOpen; }
+//------------------------------------------------------------------------------
+#ifndef use_int32
+//------------------------------------------------------------------------------
+// Int128 class (enables safe math on signed 64bit integers)
+// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
+//    Int128 val2((long64)9223372036854775807);
+//    Int128 val3 = val1 * val2;
+//    val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
+//------------------------------------------------------------------------------
+class Int128 {
+public:
+  ulong64 lo;
+  long64 hi;
+  Int128(long64 _lo = 0) {
+    lo = (ulong64)_lo;
+    if (_lo < 0)
+      hi = -1;
+    else
+      hi = 0;
+  }
+  Int128(const Int128 &val) : lo(val.lo), hi(val.hi) {}
+  Int128(const long64 &_hi, const ulong64 &_lo) : lo(_lo), hi(_hi) {}
+  Int128 &operator=(const long64 &val) {
+    lo = (ulong64)val;
+    if (val < 0)
+      hi = -1;
+    else
+      hi = 0;
+    return *this;
+  }
+  bool operator==(const Int128 &val) const {
+    return (hi == val.hi && lo == val.lo);
+  }
+  bool operator!=(const Int128 &val) const { return !(*this == val); }
+  bool operator>(const Int128 &val) const {
+    if (hi != val.hi)
+      return hi > val.hi;
+    else
+      return lo > val.lo;
+  }
+  bool operator<(const Int128 &val) const {
+    if (hi != val.hi)
+      return hi < val.hi;
+    else
+      return lo < val.lo;
+  }
+  bool operator>=(const Int128 &val) const { return !(*this < val); }
+  bool operator<=(const Int128 &val) const { return !(*this > val); }
+  Int128 &operator+=(const Int128 &rhs) {
+    hi += rhs.hi;
+    lo += rhs.lo;
+    if (lo < rhs.lo)
+      hi++;
+    return *this;
+  }
+  Int128 operator+(const Int128 &rhs) const {
+    Int128 result(*this);
+    result += rhs;
+    return result;
+  }
+  Int128 &operator-=(const Int128 &rhs) {
+    *this += -rhs;
+    return *this;
+  }
+  Int128 operator-(const Int128 &rhs) const {
+    Int128 result(*this);
+    result -= rhs;
+    return result;
+  }
+  Int128 operator-() const // unary negation
+  {
+    if (lo == 0)
+      return Int128(-hi, 0);
+    else
+      return Int128(~hi, ~lo + 1);
+  }
+  operator double() const {
+    const double shift64 = 18446744073709551616.0; // 2^64
+    if (hi < 0) {
+      if (lo == 0)
+        return (double)hi * shift64;
+      else
+        return -(double)(~lo + ~hi * shift64);
+    } else
+      return (double)(lo + hi * shift64);
+  }
+};
+//------------------------------------------------------------------------------
+Int128 Int128Mul(long64 lhs, long64 rhs) {
+  bool negate = (lhs < 0) != (rhs < 0);
+  if (lhs < 0)
+    lhs = -lhs;
+  ulong64 int1Hi = ulong64(lhs) >> 32;
+  ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
+  if (rhs < 0)
+    rhs = -rhs;
+  ulong64 int2Hi = ulong64(rhs) >> 32;
+  ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
+  // nb: see comments in clipper.pas
+  ulong64 a = int1Hi * int2Hi;
+  ulong64 b = int1Lo * int2Lo;
+  ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
+  Int128 tmp;
+  tmp.hi = long64(a + (c >> 32));
+  tmp.lo = long64(c << 32);
+  tmp.lo += long64(b);
+  if (tmp.lo < b)
+    tmp.hi++;
+  if (negate)
+    tmp = -tmp;
+  return tmp;
+};
+#endif
+//------------------------------------------------------------------------------
+// Miscellaneous global functions
+//------------------------------------------------------------------------------
+bool Orientation(const Path &poly) { return Area(poly) >= 0; }
+//------------------------------------------------------------------------------
+double Area(const Path &poly) {
+  int size = (int)poly.size();
+  if (size < 3)
+    return 0;
+  double a = 0;
+  for (int i = 0, j = size - 1; i < size; ++i) {
+    a += ((double)poly[j].X + poly[i].X) * ((double)poly[j].Y - poly[i].Y);
+    j = i;
+  }
+  return -a * 0.5;
+}
+//------------------------------------------------------------------------------
+double Area(const OutPt *op) {
+  const OutPt *startOp = op;
+  if (!op)
+    return 0;
+  double a = 0;
+  do {
+    a += (double)(op->Prev->Pt.X + op->Pt.X) *
+         (double)(op->Prev->Pt.Y - op->Pt.Y);
+    op = op->Next;
+  } while (op != startOp);
+  return a * 0.5;
+}
+//------------------------------------------------------------------------------
+double Area(const OutRec &outRec) { return Area(outRec.Pts); }
+//------------------------------------------------------------------------------
+bool PointIsVertex(const IntPoint &Pt, OutPt *pp) {
+  OutPt *pp2 = pp;
+  do {
+    if (pp2->Pt == Pt)
+      return true;
+    pp2 = pp2->Next;
+  } while (pp2 != pp);
+  return false;
+}
+//------------------------------------------------------------------------------
+// See "The Point in Polygon Problem for Arbitrary Polygons" by Hormann &
+// Agathos
+// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.5498&rep=rep1&type=pdf
+int PointInPolygon(const IntPoint &pt, const Path &path) {
+  // returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  size_t cnt = path.size();
+  if (cnt < 3)
+    return 0;
+  IntPoint ip = path[0];
+  for (size_t i = 1; i <= cnt; ++i) {
+    IntPoint ipNext = (i == cnt ? path[0] : path[i]);
+    if (ipNext.Y == pt.Y) {
+      if ((ipNext.X == pt.X) ||
+          (ip.Y == pt.Y && ((ipNext.X > pt.X) == (ip.X < pt.X))))
+        return -1;
+    }
+    if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y)) {
+      if (ip.X >= pt.X) {
+        if (ipNext.X > pt.X)
+          result = 1 - result;
+        else {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+                     (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y))
+            result = 1 - result;
+        }
+      } else {
+        if (ipNext.X > pt.X) {
+          double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
+                     (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (ipNext.Y > ip.Y))
+            result = 1 - result;
+        }
+      }
+    }
+    ip = ipNext;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+int PointInPolygon(const IntPoint &pt, OutPt *op) {
+  // returns 0 if false, +1 if true, -1 if pt ON polygon boundary
+  int result = 0;
+  OutPt *startOp = op;
+  for (;;) {
+    if (op->Next->Pt.Y == pt.Y) {
+      if ((op->Next->Pt.X == pt.X) ||
+          (op->Pt.Y == pt.Y && ((op->Next->Pt.X > pt.X) == (op->Pt.X < pt.X))))
+        return -1;
+    }
+    if ((op->Pt.Y < pt.Y) != (op->Next->Pt.Y < pt.Y)) {
+      if (op->Pt.X >= pt.X) {
+        if (op->Next->Pt.X > pt.X)
+          result = 1 - result;
+        else {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+                     (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y))
+            result = 1 - result;
+        }
+      } else {
+        if (op->Next->Pt.X > pt.X) {
+          double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
+                     (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
+          if (!d)
+            return -1;
+          if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y))
+            result = 1 - result;
+        }
+      }
+    }
+    op = op->Next;
+    if (startOp == op)
+      break;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2) {
+  OutPt *op = OutPt1;
+  do {
+    // nb: PointInPolygon returns 0 if false, +1 if true, -1 if pt on polygon
+    int res = PointInPolygon(op->Pt, OutPt2);
+    if (res >= 0)
+      return res > 0;
+    op = op->Next;
+  } while (op != OutPt1);
+  return true;
+}
+//----------------------------------------------------------------------
+bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(e1.Top.Y - e1.Bot.Y, e2.Top.X - e2.Bot.X) ==
+           Int128Mul(e1.Top.X - e1.Bot.X, e2.Top.Y - e2.Bot.Y);
+  else
+#endif
+    return (e1.Top.Y - e1.Bot.Y) * (e2.Top.X - e2.Bot.X) ==
+           (e1.Top.X - e1.Bot.X) * (e2.Top.Y - e2.Bot.Y);
+}
+//------------------------------------------------------------------------------
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3,
+                 bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y - pt2.Y, pt2.X - pt3.X) ==
+           Int128Mul(pt1.X - pt2.X, pt2.Y - pt3.Y);
+  else
+#endif
+    return (pt1.Y - pt2.Y) * (pt2.X - pt3.X) ==
+           (pt1.X - pt2.X) * (pt2.Y - pt3.Y);
+}
+//------------------------------------------------------------------------------
+bool SlopesEqual(const IntPoint pt1, const IntPoint pt2, const IntPoint pt3,
+                 const IntPoint pt4, bool UseFullInt64Range) {
+#ifndef use_int32
+  if (UseFullInt64Range)
+    return Int128Mul(pt1.Y - pt2.Y, pt3.X - pt4.X) ==
+           Int128Mul(pt1.X - pt2.X, pt3.Y - pt4.Y);
+  else
+#endif
+    return (pt1.Y - pt2.Y) * (pt3.X - pt4.X) ==
+           (pt1.X - pt2.X) * (pt3.Y - pt4.Y);
+}
+//------------------------------------------------------------------------------
+inline bool IsHorizontal(TEdge &e) { return e.Dx == HORIZONTAL; }
+//------------------------------------------------------------------------------
+inline double GetDx(const IntPoint pt1, const IntPoint pt2) {
+  return (pt1.Y == pt2.Y) ? HORIZONTAL
+                          : (double)(pt2.X - pt1.X) / (pt2.Y - pt1.Y);
+}
+//---------------------------------------------------------------------------
+inline void SetDx(TEdge &e) {
+  cInt dy = (e.Top.Y - e.Bot.Y);
+  if (dy == 0)
+    e.Dx = HORIZONTAL;
+  else
+    e.Dx = (double)(e.Top.X - e.Bot.X) / dy;
+}
+//---------------------------------------------------------------------------
+inline void SwapSides(TEdge &Edge1, TEdge &Edge2) {
+  EdgeSide Side = Edge1.Side;
+  Edge1.Side = Edge2.Side;
+  Edge2.Side = Side;
+}
+//------------------------------------------------------------------------------
+inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2) {
+  int OutIdx = Edge1.OutIdx;
+  Edge1.OutIdx = Edge2.OutIdx;
+  Edge2.OutIdx = OutIdx;
+}
+//------------------------------------------------------------------------------
+inline cInt TopX(TEdge &edge, const cInt currentY) {
+  return (currentY == edge.Top.Y)
+             ? edge.Top.X
+             : edge.Bot.X + Round(edge.Dx * (currentY - edge.Bot.Y));
+}
+//------------------------------------------------------------------------------
+void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip) {
+#ifdef use_xyz
+  ip.Z = 0;
+#endif
+  double b1, b2;
+  if (Edge1.Dx == Edge2.Dx) {
+    ip.Y = Edge1.Curr.Y;
+    ip.X = TopX(Edge1, ip.Y);
+    return;
+  } else if (Edge1.Dx == 0) {
+    ip.X = Edge1.Bot.X;
+    if (IsHorizontal(Edge2))
+      ip.Y = Edge2.Bot.Y;
+    else {
+      b2 = Edge2.Bot.Y - (Edge2.Bot.X / Edge2.Dx);
+      ip.Y = Round(ip.X / Edge2.Dx + b2);
+    }
+  } else if (Edge2.Dx == 0) {
+    ip.X = Edge2.Bot.X;
+    if (IsHorizontal(Edge1))
+      ip.Y = Edge1.Bot.Y;
+    else {
+      b1 = Edge1.Bot.Y - (Edge1.Bot.X / Edge1.Dx);
+      ip.Y = Round(ip.X / Edge1.Dx + b1);
+    }
+  } else {
+    b1 = Edge1.Bot.X - Edge1.Bot.Y * Edge1.Dx;
+    b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
+    double q = (b2 - b1) / (Edge1.Dx - Edge2.Dx);
+    ip.Y = Round(q);
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = Round(Edge1.Dx * q + b1);
+    else
+      ip.X = Round(Edge2.Dx * q + b2);
+  }
+  if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y) {
+    if (Edge1.Top.Y > Edge2.Top.Y)
+      ip.Y = Edge1.Top.Y;
+    else
+      ip.Y = Edge2.Top.Y;
+    if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge1, ip.Y);
+    else
+      ip.X = TopX(Edge2, ip.Y);
+  }
+  // finally, don't allow 'ip' to be BELOW curr.Y (ie bottom of scanbeam) ...
+  if (ip.Y > Edge1.Curr.Y) {
+    ip.Y = Edge1.Curr.Y;
+    // use the more vertical edge to derive X ...
+    if (std::fabs(Edge1.Dx) > std::fabs(Edge2.Dx))
+      ip.X = TopX(Edge2, ip.Y);
+    else
+      ip.X = TopX(Edge1, ip.Y);
+  }
+}
+//------------------------------------------------------------------------------
+void ReversePolyPtLinks(OutPt *pp) {
+  if (!pp)
+    return;
+  OutPt *pp1, *pp2;
+  pp1 = pp;
+  do {
+    pp2 = pp1->Next;
+    pp1->Next = pp1->Prev;
+    pp1->Prev = pp2;
+    pp1 = pp2;
+  } while (pp1 != pp);
+}
+//------------------------------------------------------------------------------
+void DisposeOutPts(OutPt *&pp) {
+  if (pp == 0)
+    return;
+  pp->Prev->Next = 0;
+  while (pp) {
+    OutPt *tmpPp = pp;
+    pp = pp->Next;
+    delete tmpPp;
+  }
+}
+//------------------------------------------------------------------------------
+inline void InitEdge(TEdge *e, TEdge *eNext, TEdge *ePrev, const IntPoint &Pt) {
+  std::memset(e, 0, sizeof(TEdge));
+  e->Next = eNext;
+  e->Prev = ePrev;
+  e->Curr = Pt;
+  e->OutIdx = Unassigned;
+}
+//------------------------------------------------------------------------------
+void InitEdge2(TEdge &e, PolyType Pt) {
+  if (e.Curr.Y >= e.Next->Curr.Y) {
+    e.Bot = e.Curr;
+    e.Top = e.Next->Curr;
+  } else {
+    e.Top = e.Curr;
+    e.Bot = e.Next->Curr;
+  }
+  SetDx(e);
+  e.PolyTyp = Pt;
+}
+//------------------------------------------------------------------------------
+TEdge *RemoveEdge(TEdge *e) {
+  // removes e from double_linked_list (but without removing from memory)
+  e->Prev->Next = e->Next;
+  e->Next->Prev = e->Prev;
+  TEdge *result = e->Next;
+  e->Prev = 0; // flag as removed (see ClipperBase.Clear)
+  return result;
+}
+//------------------------------------------------------------------------------
+inline void ReverseHorizontal(TEdge &e) {
+  // swap horizontal edges' Top and Bottom x's so they follow the natural
+  // progression of the bounds - ie so their xbots will align with the
+  // adjoining lower edge. [Helpful in the ProcessHorizontal() method.]
+  std::swap(e.Top.X, e.Bot.X);
+#ifdef use_xyz
+  std::swap(e.Top.Z, e.Bot.Z);
+#endif
+}
+//------------------------------------------------------------------------------
+void SwapPoints(IntPoint &pt1, IntPoint &pt2) {
+  IntPoint tmp = pt1;
+  pt1 = pt2;
+  pt2 = tmp;
+}
+//------------------------------------------------------------------------------
+bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a,
+                       IntPoint pt2b, IntPoint &pt1, IntPoint &pt2) {
+  // precondition: segments are Collinear.
+  if (Abs(pt1a.X - pt1b.X) > Abs(pt1a.Y - pt1b.Y)) {
+    if (pt1a.X > pt1b.X)
+      SwapPoints(pt1a, pt1b);
+    if (pt2a.X > pt2b.X)
+      SwapPoints(pt2a, pt2b);
+    if (pt1a.X > pt2a.X)
+      pt1 = pt1a;
+    else
+      pt1 = pt2a;
+    if (pt1b.X < pt2b.X)
+      pt2 = pt1b;
+    else
+      pt2 = pt2b;
+    return pt1.X < pt2.X;
+  } else {
+    if (pt1a.Y < pt1b.Y)
+      SwapPoints(pt1a, pt1b);
+    if (pt2a.Y < pt2b.Y)
+      SwapPoints(pt2a, pt2b);
+    if (pt1a.Y < pt2a.Y)
+      pt1 = pt1a;
+    else
+      pt1 = pt2a;
+    if (pt1b.Y > pt2b.Y)
+      pt2 = pt1b;
+    else
+      pt2 = pt2b;
+    return pt1.Y > pt2.Y;
+  }
+}
+//------------------------------------------------------------------------------
+bool FirstIsBottomPt(const OutPt *btmPt1, const OutPt *btmPt2) {
+  OutPt *p = btmPt1->Prev;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1))
+    p = p->Prev;
+  double dx1p = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+  p = btmPt1->Next;
+  while ((p->Pt == btmPt1->Pt) && (p != btmPt1))
+    p = p->Next;
+  double dx1n = std::fabs(GetDx(btmPt1->Pt, p->Pt));
+  p = btmPt2->Prev;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2))
+    p = p->Prev;
+  double dx2p = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+  p = btmPt2->Next;
+  while ((p->Pt == btmPt2->Pt) && (p != btmPt2))
+    p = p->Next;
+  double dx2n = std::fabs(GetDx(btmPt2->Pt, p->Pt));
+  if (std::max(dx1p, dx1n) == std::max(dx2p, dx2n) &&
+      std::min(dx1p, dx1n) == std::min(dx2p, dx2n))
+    return Area(btmPt1) > 0; // if otherwise identical use orientation
+  else
+    return (dx1p >= dx2p && dx1p >= dx2n) || (dx1n >= dx2p && dx1n >= dx2n);
+}
+//------------------------------------------------------------------------------
+OutPt *GetBottomPt(OutPt *pp) {
+  OutPt *dups = 0;
+  OutPt *p = pp->Next;
+  while (p != pp) {
+    if (p->Pt.Y > pp->Pt.Y) {
+      pp = p;
+      dups = 0;
+    } else if (p->Pt.Y == pp->Pt.Y && p->Pt.X <= pp->Pt.X) {
+      if (p->Pt.X < pp->Pt.X) {
+        dups = 0;
+        pp = p;
+      } else {
+        if (p->Next != pp && p->Prev != pp)
+          dups = p;
+      }
+    }
+    p = p->Next;
+  }
+  if (dups) {
+    // there appears to be at least 2 vertices at BottomPt so ...
+    while (dups != p) {
+      if (!FirstIsBottomPt(p, dups))
+        pp = dups;
+      dups = dups->Next;
+      while (dups->Pt != pp->Pt)
+        dups = dups->Next;
+    }
+  }
+  return pp;
+}
+//------------------------------------------------------------------------------
+bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1, const IntPoint pt2,
+                           const IntPoint pt3) {
+  if ((pt1 == pt3) || (pt1 == pt2) || (pt3 == pt2))
+    return false;
+  else if (pt1.X != pt3.X)
+    return (pt2.X > pt1.X) == (pt2.X < pt3.X);
+  else
+    return (pt2.Y > pt1.Y) == (pt2.Y < pt3.Y);
+}
+//------------------------------------------------------------------------------
+bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b) {
+  if (seg1a > seg1b)
+    std::swap(seg1a, seg1b);
+  if (seg2a > seg2b)
+    std::swap(seg2a, seg2b);
+  return (seg1a < seg2b) && (seg2a < seg1b);
+}
+//------------------------------------------------------------------------------
+// ClipperBase class methods ...
+//------------------------------------------------------------------------------
+ClipperBase::ClipperBase() // constructor
+{
+  m_CurrentLM = m_MinimaList.begin(); // begin() == end() here
+  m_UseFullRange = false;
+}
+//------------------------------------------------------------------------------
+ClipperBase::~ClipperBase() // destructor
+{
+  Clear();
+}
+//------------------------------------------------------------------------------
+void RangeTest(const IntPoint &Pt, bool &useFullRange) {
+  if (useFullRange) {
+    if (Pt.X > hiRange || Pt.Y > hiRange || -Pt.X > hiRange || -Pt.Y > hiRange)
+      throw clipperException("Coordinate outside allowed range");
+  } else if (Pt.X > loRange || Pt.Y > loRange || -Pt.X > loRange ||
+             -Pt.Y > loRange) {
+    useFullRange = true;
+    RangeTest(Pt, useFullRange);
+  }
+}
+//------------------------------------------------------------------------------
+TEdge *FindNextLocMin(TEdge *E) {
+  for (;;) {
+    while (E->Bot != E->Prev->Bot || E->Curr == E->Top)
+      E = E->Next;
+    if (!IsHorizontal(*E) && !IsHorizontal(*E->Prev))
+      break;
+    while (IsHorizontal(*E->Prev))
+      E = E->Prev;
+    TEdge *E2 = E;
+    while (IsHorizontal(*E))
+      E = E->Next;
+    if (E->Top.Y == E->Prev->Bot.Y)
+      continue; // ie just an intermediate horz.
+    if (E2->Prev->Bot.X < E->Bot.X)
+      E = E2;
+    break;
+  }
+  return E;
+}
+//------------------------------------------------------------------------------
+TEdge *ClipperBase::ProcessBound(TEdge *E, bool NextIsForward) {
+  TEdge *Result = E;
+  TEdge *Horz = 0;
+  if (E->OutIdx == Skip) {
+    // if edges still remain in the current bound beyond the skip edge then
+    // create another LocMin and call ProcessBound once more
+    if (NextIsForward) {
+      while (E->Top.Y == E->Next->Bot.Y)
+        E = E->Next;
+      // don't include top horizontals when parsing a bound a second time,
+      // they will be contained in the opposite bound ...
+      while (E != Result && IsHorizontal(*E))
+        E = E->Prev;
+    } else {
+      while (E->Top.Y == E->Prev->Bot.Y)
+        E = E->Prev;
+      while (E != Result && IsHorizontal(*E))
+        E = E->Next;
+    }
+    if (E == Result) {
+      if (NextIsForward)
+        Result = E->Next;
+      else
+        Result = E->Prev;
+    } else {
+      // there are more edges in the bound beyond result starting with E
+      if (NextIsForward)
+        E = Result->Next;
+      else
+        E = Result->Prev;
+      MinimaList::value_type locMin;
+      locMin.Y = E->Bot.Y;
+      locMin.LeftBound = 0;
+      locMin.RightBound = E;
+      E->WindDelta = 0;
+      Result = ProcessBound(E, NextIsForward);
+      m_MinimaList.push_back(locMin);
+    }
+    return Result;
+  }
+  TEdge *EStart;
+  if (IsHorizontal(*E)) {
+    // We need to be careful with open paths because this may not be a
+    // true local minima (ie E may be following a skip edge).
+    // Also, consecutive horz. edges may start heading left before going right.
+    if (NextIsForward)
+      EStart = E->Prev;
+    else
+      EStart = E->Next;
+    if (IsHorizontal(*EStart)) // ie an adjoining horizontal skip edge
+    {
+      if (EStart->Bot.X != E->Bot.X && EStart->Top.X != E->Bot.X)
+        ReverseHorizontal(*E);
+    } else if (EStart->Bot.X != E->Bot.X)
+      ReverseHorizontal(*E);
+  }
+  EStart = E;
+  if (NextIsForward) {
+    while (Result->Top.Y == Result->Next->Bot.Y && Result->Next->OutIdx != Skip)
+      Result = Result->Next;
+    if (IsHorizontal(*Result) && Result->Next->OutIdx != Skip) {
+      // nb: at the top of a bound, horizontals are added to the bound
+      // only when the preceding edge attaches to the horizontal's left vertex
+      // unless a Skip edge is encountered when that becomes the top divide
+      Horz = Result;
+      while (IsHorizontal(*Horz->Prev))
+        Horz = Horz->Prev;
+      if (Horz->Prev->Top.X > Result->Next->Top.X)
+        Result = Horz->Prev;
+    }
+    while (E != Result) {
+      E->NextInLML = E->Next;
+      if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
+        ReverseHorizontal(*E);
+      E = E->Next;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Next; // move to the edge just beyond current bound
+  } else {
+    while (Result->Top.Y == Result->Prev->Bot.Y && Result->Prev->OutIdx != Skip)
+      Result = Result->Prev;
+    if (IsHorizontal(*Result) && Result->Prev->OutIdx != Skip) {
+      Horz = Result;
+      while (IsHorizontal(*Horz->Next))
+        Horz = Horz->Next;
+      if (Horz->Next->Top.X == Result->Prev->Top.X ||
+          Horz->Next->Top.X > Result->Prev->Top.X)
+        Result = Horz->Next;
+    }
+    while (E != Result) {
+      E->NextInLML = E->Prev;
+      if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+        ReverseHorizontal(*E);
+      E = E->Prev;
+    }
+    if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
+      ReverseHorizontal(*E);
+    Result = Result->Prev; // move to the edge just beyond current bound
+  }
+  return Result;
+}
+//------------------------------------------------------------------------------
+bool ClipperBase::AddPath(const Path &pg, PolyType PolyTyp, bool Closed) {
+#ifdef use_lines
+  if (!Closed && PolyTyp == ptClip)
+    throw clipperException("AddPath: Open paths must be subject.");
+#else
+  if (!Closed)
+    throw clipperException("AddPath: Open paths have been disabled.");
+#endif
+  int highI = (int)pg.size() - 1;
+  if (Closed)
+    while (highI > 0 && (pg[highI] == pg[0]))
+      --highI;
+  while (highI > 0 && (pg[highI] == pg[highI - 1]))
+    --highI;
+  if ((Closed && highI < 2) || (!Closed && highI < 1))
+    return false;
+  // create a new edge array ...
+  TEdge *edges = new TEdge[highI + 1];
+  bool IsFlat = true;
+  // 1. Basic (first) edge initialization ...
+  try {
+    edges[1].Curr = pg[1];
+    RangeTest(pg[0], m_UseFullRange);
+    RangeTest(pg[highI], m_UseFullRange);
+    InitEdge(&edges[0], &edges[1], &edges[highI], pg[0]);
+    InitEdge(&edges[highI], &edges[0], &edges[highI - 1], pg[highI]);
+    for (int i = highI - 1; i >= 1; --i) {
+      RangeTest(pg[i], m_UseFullRange);
+      InitEdge(&edges[i], &edges[i + 1], &edges[i - 1], pg[i]);
+    }
+  } catch (...) {
+    delete[] edges;
+    throw; // range test fails
+  }
+  TEdge *eStart = &edges[0];
+  // 2. Remove duplicate vertices, and (when closed) collinear edges ...
+  TEdge *E = eStart, *eLoopStop = eStart;
+  for (;;) {
+    // nb: allows matching start and end points when not Closed ...
+    if (E->Curr == E->Next->Curr && (Closed || E->Next != eStart)) {
+      if (E == E->Next)
+        break;
+      if (E == eStart)
+        eStart = E->Next;
+      E = RemoveEdge(E);
+      eLoopStop = E;
+      continue;
+    }
+    if (E->Prev == E->Next)
+      break; // only two vertices
+    else if (Closed && SlopesEqual(E->Prev->Curr, E->Curr, E->Next->Curr,
+                                   m_UseFullRange) &&
+             (!m_PreserveCollinear ||
+              !Pt2IsBetweenPt1AndPt3(E->Prev->Curr, E->Curr, E->Next->Curr))) {
+      // Collinear edges are allowed for open paths but in closed paths
+      // the default is to merge adjacent collinear edges into a single edge.
+      // However, if the PreserveCollinear property is enabled, only overlapping
+      // collinear edges (ie spikes) will be removed from closed paths.
+      if (E == eStart)
+        eStart = E->Next;
+      E = RemoveEdge(E);
+      E = E->Prev;
+      eLoopStop = E;
+      continue;
+    }
+    E = E->Next;
+    if ((E == eLoopStop) || (!Closed && E->Next == eStart))
+      break;
+  }
+  if ((!Closed && (E == E->Next)) || (Closed && (E->Prev == E->Next))) {
+    delete[] edges;
+    return false;
+  }
+  if (!Closed) {
+    m_HasOpenPaths = true;
+    eStart->Prev->OutIdx = Skip;
+  }
+  // 3. Do second stage of edge initialization ...
+  E = eStart;
+  do {
+    InitEdge2(*E, PolyTyp);
+    E = E->Next;
+    if (IsFlat && E->Curr.Y != eStart->Curr.Y)
+      IsFlat = false;
+  } while (E != eStart);
+  // 4. Finally, add edge bounds to LocalMinima list ...
+  // Totally flat paths must be handled differently when adding them
+  // to LocalMinima list to avoid endless loops etc ...
+  if (IsFlat) {
+    if (Closed) {
+      delete[] edges;
+      return false;
+    }
+    E->Prev->OutIdx = Skip;
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    locMin.LeftBound = 0;
+    locMin.RightBound = E;
+    locMin.RightBound->Side = esRight;
+    locMin.RightBound->WindDelta = 0;
+    for (;;) {
+      if (E->Bot.X != E->Prev->Top.X)
+        ReverseHorizontal(*E);
+      if (E->Next->OutIdx == Skip)
+        break;
+      E->NextInLML = E->Next;
+      E = E->Next;
+    }
+    m_MinimaList.push_back(locMin);
+    m_edges.push_back(edges);
+    return true;
+  }
+  m_edges.push_back(edges);
+  bool leftBoundIsForward;
+  TEdge *EMin = 0;
+  // workaround to avoid an endless loop in the while loop below when
+  // open paths have matching start and end points ...
+  if (E->Prev->Bot == E->Prev->Top)
+    E = E->Next;
+  for (;;) {
+    E = FindNextLocMin(E);
+    if (E == EMin)
+      break;
+    else if (!EMin)
+      EMin = E;
+    // E and E.Prev now share a local minima (left aligned if horizontal).
+    // Compare their slopes to find which starts which bound ...
+    MinimaList::value_type locMin;
+    locMin.Y = E->Bot.Y;
+    if (E->Dx < E->Prev->Dx) {
+      locMin.LeftBound = E->Prev;
+      locMin.RightBound = E;
+      leftBoundIsForward = false; // Q.nextInLML = Q.prev
+    } else {
+      locMin.LeftBound = E;
+      locMin.RightBound = E->Prev;
+      leftBoundIsForward = true; // Q.nextInLML = Q.next
+    }
+    if (!Closed)
+      locMin.LeftBound->WindDelta = 0;
+    else if (locMin.LeftBound->Next == locMin.RightBound)
+      locMin.LeftBound->WindDelta = -1;
+    else
+      locMin.LeftBound->WindDelta = 1;
+    locMin.RightBound->WindDelta = -locMin.LeftBound->WindDelta;
+    E = ProcessBound(locMin.LeftBound, leftBoundIsForward);
+    if (E->OutIdx == Skip)
+      E = ProcessBound(E, leftBoundIsForward);
+    TEdge *E2 = ProcessBound(locMin.RightBound, !leftBoundIsForward);
+    if (E2->OutIdx == Skip)
+      E2 = ProcessBound(E2, !leftBoundIsForward);
+    if (locMin.LeftBound->OutIdx == Skip)
+      locMin.LeftBound = 0;
+    else if (locMin.RightBound->OutIdx == Skip)
+      locMin.RightBound = 0;
+    m_MinimaList.push_back(locMin);
+    if (!leftBoundIsForward)
+      E = E2;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+bool ClipperBase::AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed) {
+  bool result = false;
+  for (Paths::size_type i = 0; i < ppg.size(); ++i)
+    if (AddPath(ppg[i], PolyTyp, Closed))
+      result = true;
+  return result;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::Clear() {
+  DisposeLocalMinimaList();
+  for (EdgeList::size_type i = 0; i < m_edges.size(); ++i) {
+    TEdge *edges = m_edges[i];
+    delete[] edges;
+  }
+  m_edges.clear();
+  m_UseFullRange = false;
+  m_HasOpenPaths = false;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::Reset() {
+  m_CurrentLM = m_MinimaList.begin();
+  if (m_CurrentLM == m_MinimaList.end())
+    return; // ie nothing to process
+  std::sort(m_MinimaList.begin(), m_MinimaList.end(), LocMinSorter());
+  m_Scanbeam = ScanbeamList(); // clears/resets priority_queue
+  // reset all edges ...
+  for (MinimaList::iterator lm = m_MinimaList.begin(); lm != m_MinimaList.end();
+       ++lm) {
+    InsertScanbeam(lm->Y);
+    TEdge *e = lm->LeftBound;
+    if (e) {
+      e->Curr = e->Bot;
+      e->Side = esLeft;
+      e->OutIdx = Unassigned;
+    }
+    e = lm->RightBound;
+    if (e) {
+      e->Curr = e->Bot;
+      e->Side = esRight;
+      e->OutIdx = Unassigned;
+    }
+  }
+  m_ActiveEdges = 0;
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+void ClipperBase::DisposeLocalMinimaList() {
+  m_MinimaList.clear();
+  m_CurrentLM = m_MinimaList.begin();
+}
+//------------------------------------------------------------------------------
+bool ClipperBase::PopLocalMinima(cInt Y, const LocalMinimum *&locMin) {
+  if (m_CurrentLM == m_MinimaList.end() || (*m_CurrentLM).Y != Y)
+    return false;
+  locMin = &(*m_CurrentLM);
+  ++m_CurrentLM;
+  return true;
+}
+//------------------------------------------------------------------------------
+IntRect ClipperBase::GetBounds() {
+  IntRect result;
+  MinimaList::iterator lm = m_MinimaList.begin();
+  if (lm == m_MinimaList.end()) {
+    result.left = result.top = result.right = result.bottom = 0;
+    return result;
+  }
+  result.left = lm->LeftBound->Bot.X;
+  result.top = lm->LeftBound->Bot.Y;
+  result.right = lm->LeftBound->Bot.X;
+  result.bottom = lm->LeftBound->Bot.Y;
+  while (lm != m_MinimaList.end()) {
+    // todo - needs fixing for open paths
+    result.bottom = std::max(result.bottom, lm->LeftBound->Bot.Y);
+    TEdge *e = lm->LeftBound;
+    for (;;) {
+      TEdge *bottomE = e;
+      while (e->NextInLML) {
+        if (e->Bot.X < result.left)
+          result.left = e->Bot.X;
+        if (e->Bot.X > result.right)
+          result.right = e->Bot.X;
+        e = e->NextInLML;
+      }
+      result.left = std::min(result.left, e->Bot.X);
+      result.right = std::max(result.right, e->Bot.X);
+      result.left = std::min(result.left, e->Top.X);
+      result.right = std::max(result.right, e->Top.X);
+      result.top = std::min(result.top, e->Top.Y);
+      if (bottomE == lm->LeftBound)
+        e = lm->RightBound;
+      else
+        break;
+    }
+    ++lm;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::InsertScanbeam(const cInt Y) { m_Scanbeam.push(Y); }
+//------------------------------------------------------------------------------
+bool ClipperBase::PopScanbeam(cInt &Y) {
+  if (m_Scanbeam.empty())
+    return false;
+  Y = m_Scanbeam.top();
+  m_Scanbeam.pop();
+  while (!m_Scanbeam.empty() && Y == m_Scanbeam.top()) {
+    m_Scanbeam.pop();
+  } // Pop duplicates.
+  return true;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::DisposeAllOutRecs() {
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
+    DisposeOutRec(i);
+  m_PolyOuts.clear();
+}
+//------------------------------------------------------------------------------
+void ClipperBase::DisposeOutRec(PolyOutList::size_type index) {
+  OutRec *outRec = m_PolyOuts[index];
+  if (outRec->Pts)
+    DisposeOutPts(outRec->Pts);
+  delete outRec;
+  m_PolyOuts[index] = 0;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::DeleteFromAEL(TEdge *e) {
+  TEdge *AelPrev = e->PrevInAEL;
+  TEdge *AelNext = e->NextInAEL;
+  if (!AelPrev && !AelNext && (e != m_ActiveEdges))
+    return; // already deleted
+  if (AelPrev)
+    AelPrev->NextInAEL = AelNext;
+  else
+    m_ActiveEdges = AelNext;
+  if (AelNext)
+    AelNext->PrevInAEL = AelPrev;
+  e->NextInAEL = 0;
+  e->PrevInAEL = 0;
+}
+//------------------------------------------------------------------------------
+OutRec *ClipperBase::CreateOutRec() {
+  OutRec *result = new OutRec;
+  result->IsHole = false;
+  result->IsOpen = false;
+  result->FirstLeft = 0;
+  result->Pts = 0;
+  result->BottomPt = 0;
+  result->PolyNd = 0;
+  m_PolyOuts.push_back(result);
+  result->Idx = (int)m_PolyOuts.size() - 1;
+  return result;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::SwapPositionsInAEL(TEdge *Edge1, TEdge *Edge2) {
+  // check that one or other edge hasn't already been removed from AEL ...
+  if (Edge1->NextInAEL == Edge1->PrevInAEL ||
+      Edge2->NextInAEL == Edge2->PrevInAEL)
+    return;
+  if (Edge1->NextInAEL == Edge2) {
+    TEdge *Next = Edge2->NextInAEL;
+    if (Next)
+      Next->PrevInAEL = Edge1;
+    TEdge *Prev = Edge1->PrevInAEL;
+    if (Prev)
+      Prev->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    Edge2->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2;
+    Edge1->NextInAEL = Next;
+  } else if (Edge2->NextInAEL == Edge1) {
+    TEdge *Next = Edge1->NextInAEL;
+    if (Next)
+      Next->PrevInAEL = Edge2;
+    TEdge *Prev = Edge2->PrevInAEL;
+    if (Prev)
+      Prev->NextInAEL = Edge1;
+    Edge1->PrevInAEL = Prev;
+    Edge1->NextInAEL = Edge2;
+    Edge2->PrevInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+  } else {
+    TEdge *Next = Edge1->NextInAEL;
+    TEdge *Prev = Edge1->PrevInAEL;
+    Edge1->NextInAEL = Edge2->NextInAEL;
+    if (Edge1->NextInAEL)
+      Edge1->NextInAEL->PrevInAEL = Edge1;
+    Edge1->PrevInAEL = Edge2->PrevInAEL;
+    if (Edge1->PrevInAEL)
+      Edge1->PrevInAEL->NextInAEL = Edge1;
+    Edge2->NextInAEL = Next;
+    if (Edge2->NextInAEL)
+      Edge2->NextInAEL->PrevInAEL = Edge2;
+    Edge2->PrevInAEL = Prev;
+    if (Edge2->PrevInAEL)
+      Edge2->PrevInAEL->NextInAEL = Edge2;
+  }
+  if (!Edge1->PrevInAEL)
+    m_ActiveEdges = Edge1;
+  else if (!Edge2->PrevInAEL)
+    m_ActiveEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+void ClipperBase::UpdateEdgeIntoAEL(TEdge *&e) {
+  if (!e->NextInLML)
+    throw clipperException("UpdateEdgeIntoAEL: invalid call");
+  e->NextInLML->OutIdx = e->OutIdx;
+  TEdge *AelPrev = e->PrevInAEL;
+  TEdge *AelNext = e->NextInAEL;
+  if (AelPrev)
+    AelPrev->NextInAEL = e->NextInLML;
+  else
+    m_ActiveEdges = e->NextInLML;
+  if (AelNext)
+    AelNext->PrevInAEL = e->NextInLML;
+  e->NextInLML->Side = e->Side;
+  e->NextInLML->WindDelta = e->WindDelta;
+  e->NextInLML->WindCnt = e->WindCnt;
+  e->NextInLML->WindCnt2 = e->WindCnt2;
+  e = e->NextInLML;
+  e->Curr = e->Bot;
+  e->PrevInAEL = AelPrev;
+  e->NextInAEL = AelNext;
+  if (!IsHorizontal(*e))
+    InsertScanbeam(e->Top.Y);
+}
+//------------------------------------------------------------------------------
+bool ClipperBase::LocalMinimaPending() {
+  return (m_CurrentLM != m_MinimaList.end());
+}
+//------------------------------------------------------------------------------
+// TClipper methods ...
+//------------------------------------------------------------------------------
+Clipper::Clipper(int initOptions)
+    : ClipperBase() // constructor
+{
+  m_ExecuteLocked = false;
+  m_UseFullRange = false;
+  m_ReverseOutput = ((initOptions & ioReverseSolution) != 0);
+  m_StrictSimple = ((initOptions & ioStrictlySimple) != 0);
+  m_PreserveCollinear = ((initOptions & ioPreserveCollinear) != 0);
+  m_HasOpenPaths = false;
+#ifdef use_xyz
+  m_ZFill = 0;
+#endif
+}
+//------------------------------------------------------------------------------
+#ifdef use_xyz
+void Clipper::ZFillFunction(ZFillCallback zFillFunc) { m_ZFill = zFillFunc; }
+//------------------------------------------------------------------------------
+#endif
+bool Clipper::Execute(ClipType clipType, Paths &solution,
+                      PolyFillType fillType) {
+  return Execute(clipType, solution, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+bool Clipper::Execute(ClipType clipType, PolyTree &polytree,
+                      PolyFillType fillType) {
+  return Execute(clipType, polytree, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+bool Clipper::Execute(ClipType clipType, Paths &solution,
+                      PolyFillType subjFillType, PolyFillType clipFillType) {
+  if (m_ExecuteLocked)
+    return false;
+  if (m_HasOpenPaths)
+    throw clipperException(
+        "Error: PolyTree struct is needed for open path clipping.");
+  m_ExecuteLocked = true;
+  solution.resize(0);
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = false;
+  bool succeeded = ExecuteInternal();
+  if (succeeded)
+    BuildResult(solution);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+bool Clipper::Execute(ClipType clipType, PolyTree &polytree,
+                      PolyFillType subjFillType, PolyFillType clipFillType) {
+  if (m_ExecuteLocked)
+    return false;
+  m_ExecuteLocked = true;
+  m_SubjFillType = subjFillType;
+  m_ClipFillType = clipFillType;
+  m_ClipType = clipType;
+  m_UsingPolyTree = true;
+  bool succeeded = ExecuteInternal();
+  if (succeeded)
+    BuildResult2(polytree);
+  DisposeAllOutRecs();
+  m_ExecuteLocked = false;
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+void Clipper::FixHoleLinkage(OutRec &outrec) {
+  // skip OutRecs that (a) contain outermost polygons or
+  //(b) already have the correct owner/child linkage ...
+  if (!outrec.FirstLeft ||
+      (outrec.IsHole != outrec.FirstLeft->IsHole && outrec.FirstLeft->Pts))
+    return;
+  OutRec *orfl = outrec.FirstLeft;
+  while (orfl && ((orfl->IsHole == outrec.IsHole) || !orfl->Pts))
+    orfl = orfl->FirstLeft;
+  outrec.FirstLeft = orfl;
+}
+//------------------------------------------------------------------------------
+bool Clipper::ExecuteInternal() {
+  bool succeeded = true;
+  try {
+    Reset();
+    m_Maxima = MaximaList();
+    m_SortedEdges = 0;
+    succeeded = true;
+    cInt botY, topY;
+    if (!PopScanbeam(botY))
+      return false;
+    InsertLocalMinimaIntoAEL(botY);
+    while (PopScanbeam(topY) || LocalMinimaPending()) {
+      ProcessHorizontals();
+      ClearGhostJoins();
+      if (!ProcessIntersections(topY)) {
+        succeeded = false;
+        break;
+      }
+      ProcessEdgesAtTopOfScanbeam(topY);
+      botY = topY;
+      InsertLocalMinimaIntoAEL(botY);
+    }
+  } catch (...) {
+    succeeded = false;
+  }
+  if (succeeded) {
+    // fix orientations ...
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts || outRec->IsOpen)
+        continue;
+      if ((outRec->IsHole ^ m_ReverseOutput) == (Area(*outRec) > 0))
+        ReversePolyPtLinks(outRec->Pts);
+    }
+    if (!m_Joins.empty())
+      JoinCommonEdges();
+    // unfortunately FixupOutPolygon() must be done after JoinCommonEdges()
+    for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+      OutRec *outRec = m_PolyOuts[i];
+      if (!outRec->Pts)
+        continue;
+      if (outRec->IsOpen)
+        FixupOutPolyline(*outRec);
+      else
+        FixupOutPolygon(*outRec);
+    }
+    if (m_StrictSimple)
+      DoSimplePolygons();
+  }
+  ClearJoins();
+  ClearGhostJoins();
+  return succeeded;
+}
+//------------------------------------------------------------------------------
+void Clipper::SetWindingCount(TEdge &edge) {
+  TEdge *e = edge.PrevInAEL;
+  // find the edge of the same polytype that immediately preceeds 'edge' in AEL
+  while (e && ((e->PolyTyp != edge.PolyTyp) || (e->WindDelta == 0)))
+    e = e->PrevInAEL;
+  if (!e) {
+    if (edge.WindDelta == 0) {
+      PolyFillType pft =
+          (edge.PolyTyp == ptSubject ? m_SubjFillType : m_ClipFillType);
+      edge.WindCnt = (pft == pftNegative ? -1 : 1);
+    } else
+      edge.WindCnt = edge.WindDelta;
+    edge.WindCnt2 = 0;
+    e = m_ActiveEdges; // ie get ready to calc WindCnt2
+  } else if (edge.WindDelta == 0 && m_ClipType != ctUnion) {
+    edge.WindCnt = 1;
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  } else if (IsEvenOddFillType(edge)) {
+    // EvenOdd filling ...
+    if (edge.WindDelta == 0) {
+      // are we inside a subj polygon ...
+      bool Inside = true;
+      TEdge *e2 = e->PrevInAEL;
+      while (e2) {
+        if (e2->PolyTyp == e->PolyTyp && e2->WindDelta != 0)
+          Inside = !Inside;
+        e2 = e2->PrevInAEL;
+      }
+      edge.WindCnt = (Inside ? 0 : 1);
+    } else {
+      edge.WindCnt = edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  } else {
+    // nonZero, Positive or Negative filling ...
+    if (e->WindCnt * e->WindDelta < 0) {
+      // prev edge is 'decreasing' WindCount (WC) toward zero
+      // so we're outside the previous polygon ...
+      if (Abs(e->WindCnt) > 1) {
+        // outside prev poly but still inside another.
+        // when reversing direction of prev poly use the same WC
+        if (e->WindDelta * edge.WindDelta < 0)
+          edge.WindCnt = e->WindCnt;
+        // otherwise continue to 'decrease' WC ...
+        else
+          edge.WindCnt = e->WindCnt + edge.WindDelta;
+      } else
+        // now outside all polys of same polytype so set own WC ...
+        edge.WindCnt = (edge.WindDelta == 0 ? 1 : edge.WindDelta);
+    } else {
+      // prev edge is 'increasing' WindCount (WC) away from zero
+      // so we're inside the previous polygon ...
+      if (edge.WindDelta == 0)
+        edge.WindCnt = (e->WindCnt < 0 ? e->WindCnt - 1 : e->WindCnt + 1);
+      // if wind direction is reversing prev then use same WC
+      else if (e->WindDelta * edge.WindDelta < 0)
+        edge.WindCnt = e->WindCnt;
+      // otherwise add to WC ...
+      else
+        edge.WindCnt = e->WindCnt + edge.WindDelta;
+    }
+    edge.WindCnt2 = e->WindCnt2;
+    e = e->NextInAEL; // ie get ready to calc WindCnt2
+  }
+  // update WindCnt2 ...
+  if (IsEvenOddAltFillType(edge)) {
+    // EvenOdd filling ...
+    while (e != &edge) {
+      if (e->WindDelta != 0)
+        edge.WindCnt2 = (edge.WindCnt2 == 0 ? 1 : 0);
+      e = e->NextInAEL;
+    }
+  } else {
+    // nonZero, Positive or Negative filling ...
+    while (e != &edge) {
+      edge.WindCnt2 += e->WindDelta;
+      e = e->NextInAEL;
+    }
+  }
+}
+//------------------------------------------------------------------------------
+bool Clipper::IsEvenOddFillType(const TEdge &edge) const {
+  if (edge.PolyTyp == ptSubject)
+    return m_SubjFillType == pftEvenOdd;
+  else
+    return m_ClipFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+bool Clipper::IsEvenOddAltFillType(const TEdge &edge) const {
+  if (edge.PolyTyp == ptSubject)
+    return m_ClipFillType == pftEvenOdd;
+  else
+    return m_SubjFillType == pftEvenOdd;
+}
+//------------------------------------------------------------------------------
+bool Clipper::IsContributing(const TEdge &edge) const {
+  PolyFillType pft, pft2;
+  if (edge.PolyTyp == ptSubject) {
+    pft = m_SubjFillType;
+    pft2 = m_ClipFillType;
+  } else {
+    pft = m_ClipFillType;
+    pft2 = m_SubjFillType;
+  }
+  switch (pft) {
+  case pftEvenOdd:
+    // return false if a subj line has been flagged as inside a subj polygon
+    if (edge.WindDelta == 0 && edge.WindCnt != 1)
+      return false;
+    break;
+  case pftNonZero:
+    if (Abs(edge.WindCnt) != 1)
+      return false;
+    break;
+  case pftPositive:
+    if (edge.WindCnt != 1)
+      return false;
+    break;
+  default: // pftNegative
+    if (edge.WindCnt != -1)
+      return false;
+  }
+  switch (m_ClipType) {
+  case ctIntersection:
+    switch (pft2) {
+    case pftEvenOdd:
+    case pftNonZero:
+      return (edge.WindCnt2 != 0);
+    case pftPositive:
+      return (edge.WindCnt2 > 0);
+    default:
+      return (edge.WindCnt2 < 0);
+    }
+    break;
+  case ctUnion:
+    switch (pft2) {
+    case pftEvenOdd:
+    case pftNonZero:
+      return (edge.WindCnt2 == 0);
+    case pftPositive:
+      return (edge.WindCnt2 <= 0);
+    default:
+      return (edge.WindCnt2 >= 0);
+    }
+    break;
+  case ctDifference:
+    if (edge.PolyTyp == ptSubject)
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 == 0);
+      case pftPositive:
+        return (edge.WindCnt2 <= 0);
+      default:
+        return (edge.WindCnt2 >= 0);
+      }
+    else
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 != 0);
+      case pftPositive:
+        return (edge.WindCnt2 > 0);
+      default:
+        return (edge.WindCnt2 < 0);
+      }
+    break;
+  case ctXor:
+    if (edge.WindDelta == 0) // XOr always contributing unless open
+      switch (pft2) {
+      case pftEvenOdd:
+      case pftNonZero:
+        return (edge.WindCnt2 == 0);
+      case pftPositive:
+        return (edge.WindCnt2 <= 0);
+      default:
+        return (edge.WindCnt2 >= 0);
+      }
+    else
+      return true;
+    break;
+  default:
+    return true;
+  }
+}
+//------------------------------------------------------------------------------
+OutPt *Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) {
+  OutPt *result;
+  TEdge *e, *prevE;
+  if (IsHorizontal(*e2) || (e1->Dx > e2->Dx)) {
+    result = AddOutPt(e1, Pt);
+    e2->OutIdx = e1->OutIdx;
+    e1->Side = esLeft;
+    e2->Side = esRight;
+    e = e1;
+    if (e->PrevInAEL == e2)
+      prevE = e2->PrevInAEL;
+    else
+      prevE = e->PrevInAEL;
+  } else {
+    result = AddOutPt(e2, Pt);
+    e1->OutIdx = e2->OutIdx;
+    e1->Side = esRight;
+    e2->Side = esLeft;
+    e = e2;
+    if (e->PrevInAEL == e1)
+      prevE = e1->PrevInAEL;
+    else
+      prevE = e->PrevInAEL;
+  }
+  if (prevE && prevE->OutIdx >= 0 && prevE->Top.Y < Pt.Y && e->Top.Y < Pt.Y) {
+    cInt xPrev = TopX(*prevE, Pt.Y);
+    cInt xE = TopX(*e, Pt.Y);
+    if (xPrev == xE && (e->WindDelta != 0) && (prevE->WindDelta != 0) &&
+        SlopesEqual(IntPoint(xPrev, Pt.Y), prevE->Top, IntPoint(xE, Pt.Y),
+                    e->Top, m_UseFullRange)) {
+      OutPt *outPt = AddOutPt(prevE, Pt);
+      AddJoin(result, outPt, e->Top);
+    }
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+void Clipper::AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt) {
+  AddOutPt(e1, Pt);
+  if (e2->WindDelta == 0)
+    AddOutPt(e2, Pt);
+  if (e1->OutIdx == e2->OutIdx) {
+    e1->OutIdx = Unassigned;
+    e2->OutIdx = Unassigned;
+  } else if (e1->OutIdx < e2->OutIdx)
+    AppendPolygon(e1, e2);
+  else
+    AppendPolygon(e2, e1);
+}
+//------------------------------------------------------------------------------
+void Clipper::AddEdgeToSEL(TEdge *edge) {
+  // SEL pointers in PEdge are reused to build a list of horizontal edges.
+  // However, we don't need to worry about order with horizontal edge
+  // processing.
+  if (!m_SortedEdges) {
+    m_SortedEdges = edge;
+    edge->PrevInSEL = 0;
+    edge->NextInSEL = 0;
+  } else {
+    edge->NextInSEL = m_SortedEdges;
+    edge->PrevInSEL = 0;
+    m_SortedEdges->PrevInSEL = edge;
+    m_SortedEdges = edge;
+  }
+}
+//------------------------------------------------------------------------------
+bool Clipper::PopEdgeFromSEL(TEdge *&edge) {
+  if (!m_SortedEdges)
+    return false;
+  edge = m_SortedEdges;
+  DeleteFromSEL(m_SortedEdges);
+  return true;
+}
+//------------------------------------------------------------------------------
+void Clipper::CopyAELToSEL() {
+  TEdge *e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while (e) {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::AddJoin(OutPt *op1, OutPt *op2, const IntPoint OffPt) {
+  Join *j = new Join;
+  j->OutPt1 = op1;
+  j->OutPt2 = op2;
+  j->OffPt = OffPt;
+  m_Joins.push_back(j);
+}
+//------------------------------------------------------------------------------
+void Clipper::ClearJoins() {
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
+    delete m_Joins[i];
+  m_Joins.resize(0);
+}
+//------------------------------------------------------------------------------
+void Clipper::ClearGhostJoins() {
+  for (JoinList::size_type i = 0; i < m_GhostJoins.size(); i++)
+    delete m_GhostJoins[i];
+  m_GhostJoins.resize(0);
+}
+//------------------------------------------------------------------------------
+void Clipper::AddGhostJoin(OutPt *op, const IntPoint OffPt) {
+  Join *j = new Join;
+  j->OutPt1 = op;
+  j->OutPt2 = 0;
+  j->OffPt = OffPt;
+  m_GhostJoins.push_back(j);
+}
+//------------------------------------------------------------------------------
+void Clipper::InsertLocalMinimaIntoAEL(const cInt botY) {
+  const LocalMinimum *lm;
+  while (PopLocalMinima(botY, lm)) {
+    TEdge *lb = lm->LeftBound;
+    TEdge *rb = lm->RightBound;
+    OutPt *Op1 = 0;
+    if (!lb) {
+      // nb: don't insert LB into either AEL or SEL
+      InsertEdgeIntoAEL(rb, 0);
+      SetWindingCount(*rb);
+      if (IsContributing(*rb))
+        Op1 = AddOutPt(rb, rb->Bot);
+    } else if (!rb) {
+      InsertEdgeIntoAEL(lb, 0);
+      SetWindingCount(*lb);
+      if (IsContributing(*lb))
+        Op1 = AddOutPt(lb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    } else {
+      InsertEdgeIntoAEL(lb, 0);
+      InsertEdgeIntoAEL(rb, lb);
+      SetWindingCount(*lb);
+      rb->WindCnt = lb->WindCnt;
+      rb->WindCnt2 = lb->WindCnt2;
+      if (IsContributing(*lb))
+        Op1 = AddLocalMinPoly(lb, rb, lb->Bot);
+      InsertScanbeam(lb->Top.Y);
+    }
+    if (rb) {
+      if (IsHorizontal(*rb)) {
+        AddEdgeToSEL(rb);
+        if (rb->NextInLML)
+          InsertScanbeam(rb->NextInLML->Top.Y);
+      } else
+        InsertScanbeam(rb->Top.Y);
+    }
+    if (!lb || !rb)
+      continue;
+    // if any output polygons share an edge, they'll need joining later ...
+    if (Op1 && IsHorizontal(*rb) && m_GhostJoins.size() > 0 &&
+        (rb->WindDelta != 0)) {
+      for (JoinList::size_type i = 0; i < m_GhostJoins.size(); ++i) {
+        Join *jr = m_GhostJoins[i];
+        // if the horizontal Rb and a 'ghost' horizontal overlap, then convert
+        // the 'ghost' join to a real join ready for later ...
+        if (HorzSegmentsOverlap(jr->OutPt1->Pt.X, jr->OffPt.X, rb->Bot.X,
+                                rb->Top.X))
+          AddJoin(jr->OutPt1, Op1, jr->OffPt);
+      }
+    }
+    if (lb->OutIdx >= 0 && lb->PrevInAEL &&
+        lb->PrevInAEL->Curr.X == lb->Bot.X && lb->PrevInAEL->OutIdx >= 0 &&
+        SlopesEqual(lb->PrevInAEL->Bot, lb->PrevInAEL->Top, lb->Curr, lb->Top,
+                    m_UseFullRange) &&
+        (lb->WindDelta != 0) && (lb->PrevInAEL->WindDelta != 0)) {
+      OutPt *Op2 = AddOutPt(lb->PrevInAEL, lb->Bot);
+      AddJoin(Op1, Op2, lb->Top);
+    }
+    if (lb->NextInAEL != rb) {
+      if (rb->OutIdx >= 0 && rb->PrevInAEL->OutIdx >= 0 &&
+          SlopesEqual(rb->PrevInAEL->Curr, rb->PrevInAEL->Top, rb->Curr,
+                      rb->Top, m_UseFullRange) &&
+          (rb->WindDelta != 0) && (rb->PrevInAEL->WindDelta != 0)) {
+        OutPt *Op2 = AddOutPt(rb->PrevInAEL, rb->Bot);
+        AddJoin(Op1, Op2, rb->Top);
+      }
+      TEdge *e = lb->NextInAEL;
+      if (e) {
+        while (e != rb) {
+          // nb: For calculating winding counts etc, IntersectEdges() assumes
+          // that param1 will be to the Right of param2 ABOVE the intersection
+          // ...
+          IntersectEdges(rb, e, lb->Curr); // order important here
+          e = e->NextInAEL;
+        }
+      }
+    }
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::DeleteFromSEL(TEdge *e) {
+  TEdge *SelPrev = e->PrevInSEL;
+  TEdge *SelNext = e->NextInSEL;
+  if (!SelPrev && !SelNext && (e != m_SortedEdges))
+    return; // already deleted
+  if (SelPrev)
+    SelPrev->NextInSEL = SelNext;
+  else
+    m_SortedEdges = SelNext;
+  if (SelNext)
+    SelNext->PrevInSEL = SelPrev;
+  e->NextInSEL = 0;
+  e->PrevInSEL = 0;
+}
+//------------------------------------------------------------------------------
+#ifdef use_xyz
+void Clipper::SetZ(IntPoint &pt, TEdge &e1, TEdge &e2) {
+  if (pt.Z != 0 || !m_ZFill)
+    return;
+  else if (pt == e1.Bot)
+    pt.Z = e1.Bot.Z;
+  else if (pt == e1.Top)
+    pt.Z = e1.Top.Z;
+  else if (pt == e2.Bot)
+    pt.Z = e2.Bot.Z;
+  else if (pt == e2.Top)
+    pt.Z = e2.Top.Z;
+  else
+    (*m_ZFill)(e1.Bot, e1.Top, e2.Bot, e2.Top, pt);
+}
+//------------------------------------------------------------------------------
+#endif
+void Clipper::IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &Pt) {
+  bool e1Contributing = (e1->OutIdx >= 0);
+  bool e2Contributing = (e2->OutIdx >= 0);
+#ifdef use_xyz
+  SetZ(Pt, *e1, *e2);
+#endif
+#ifdef use_lines
+  // if either edge is on an OPEN path ...
+  if (e1->WindDelta == 0 || e2->WindDelta == 0) {
+    // ignore subject-subject open path intersections UNLESS they
+    // are both open paths, AND they are both 'contributing maximas' ...
+    if (e1->WindDelta == 0 && e2->WindDelta == 0)
+      return;
+    // if intersecting a subj line with a subj poly ...
+    else if (e1->PolyTyp == e2->PolyTyp && e1->WindDelta != e2->WindDelta &&
+             m_ClipType == ctUnion) {
+      if (e1->WindDelta == 0) {
+        if (e2Contributing) {
+          AddOutPt(e1, Pt);
+          if (e1Contributing)
+            e1->OutIdx = Unassigned;
+        }
+      } else {
+        if (e1Contributing) {
+          AddOutPt(e2, Pt);
+          if (e2Contributing)
+            e2->OutIdx = Unassigned;
+        }
+      }
+    } else if (e1->PolyTyp != e2->PolyTyp) {
+      // toggle subj open path OutIdx on/off when Abs(clip.WndCnt) == 1 ...
+      if ((e1->WindDelta == 0) && abs(e2->WindCnt) == 1 &&
+          (m_ClipType != ctUnion || e2->WindCnt2 == 0)) {
+        AddOutPt(e1, Pt);
+        if (e1Contributing)
+          e1->OutIdx = Unassigned;
+      } else if ((e2->WindDelta == 0) && (abs(e1->WindCnt) == 1) &&
+                 (m_ClipType != ctUnion || e1->WindCnt2 == 0)) {
+        AddOutPt(e2, Pt);
+        if (e2Contributing)
+          e2->OutIdx = Unassigned;
+      }
+    }
+    return;
+  }
+#endif
+  // update winding counts...
+  // assumes that e1 will be to the Right of e2 ABOVE the intersection
+  if (e1->PolyTyp == e2->PolyTyp) {
+    if (IsEvenOddFillType(*e1)) {
+      int oldE1WindCnt = e1->WindCnt;
+      e1->WindCnt = e2->WindCnt;
+      e2->WindCnt = oldE1WindCnt;
+    } else {
+      if (e1->WindCnt + e2->WindDelta == 0)
+        e1->WindCnt = -e1->WindCnt;
+      else
+        e1->WindCnt += e2->WindDelta;
+      if (e2->WindCnt - e1->WindDelta == 0)
+        e2->WindCnt = -e2->WindCnt;
+      else
+        e2->WindCnt -= e1->WindDelta;
+    }
+  } else {
+    if (!IsEvenOddFillType(*e2))
+      e1->WindCnt2 += e2->WindDelta;
+    else
+      e1->WindCnt2 = (e1->WindCnt2 == 0) ? 1 : 0;
+    if (!IsEvenOddFillType(*e1))
+      e2->WindCnt2 -= e1->WindDelta;
+    else
+      e2->WindCnt2 = (e2->WindCnt2 == 0) ? 1 : 0;
+  }
+  PolyFillType e1FillType, e2FillType, e1FillType2, e2FillType2;
+  if (e1->PolyTyp == ptSubject) {
+    e1FillType = m_SubjFillType;
+    e1FillType2 = m_ClipFillType;
+  } else {
+    e1FillType = m_ClipFillType;
+    e1FillType2 = m_SubjFillType;
+  }
+  if (e2->PolyTyp == ptSubject) {
+    e2FillType = m_SubjFillType;
+    e2FillType2 = m_ClipFillType;
+  } else {
+    e2FillType = m_ClipFillType;
+    e2FillType2 = m_SubjFillType;
+  }
+  cInt e1Wc, e2Wc;
+  switch (e1FillType) {
+  case pftPositive:
+    e1Wc = e1->WindCnt;
+    break;
+  case pftNegative:
+    e1Wc = -e1->WindCnt;
+    break;
+  default:
+    e1Wc = Abs(e1->WindCnt);
+  }
+  switch (e2FillType) {
+  case pftPositive:
+    e2Wc = e2->WindCnt;
+    break;
+  case pftNegative:
+    e2Wc = -e2->WindCnt;
+    break;
+  default:
+    e2Wc = Abs(e2->WindCnt);
+  }
+  if (e1Contributing && e2Contributing) {
+    if ((e1Wc != 0 && e1Wc != 1) || (e2Wc != 0 && e2Wc != 1) ||
+        (e1->PolyTyp != e2->PolyTyp && m_ClipType != ctXor)) {
+      AddLocalMaxPoly(e1, e2, Pt);
+    } else {
+      AddOutPt(e1, Pt);
+      AddOutPt(e2, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if (e1Contributing) {
+    if (e2Wc == 0 || e2Wc == 1) {
+      AddOutPt(e1, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if (e2Contributing) {
+    if (e1Wc == 0 || e1Wc == 1) {
+      AddOutPt(e2, Pt);
+      SwapSides(*e1, *e2);
+      SwapPolyIndexes(*e1, *e2);
+    }
+  } else if ((e1Wc == 0 || e1Wc == 1) && (e2Wc == 0 || e2Wc == 1)) {
+    // neither edge is currently contributing ...
+    cInt e1Wc2, e2Wc2;
+    switch (e1FillType2) {
+    case pftPositive:
+      e1Wc2 = e1->WindCnt2;
+      break;
+    case pftNegative:
+      e1Wc2 = -e1->WindCnt2;
+      break;
+    default:
+      e1Wc2 = Abs(e1->WindCnt2);
+    }
+    switch (e2FillType2) {
+    case pftPositive:
+      e2Wc2 = e2->WindCnt2;
+      break;
+    case pftNegative:
+      e2Wc2 = -e2->WindCnt2;
+      break;
+    default:
+      e2Wc2 = Abs(e2->WindCnt2);
+    }
+    if (e1->PolyTyp != e2->PolyTyp) {
+      AddLocalMinPoly(e1, e2, Pt);
+    } else if (e1Wc == 1 && e2Wc == 1)
+      switch (m_ClipType) {
+      case ctIntersection:
+        if (e1Wc2 > 0 && e2Wc2 > 0)
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctUnion:
+        if (e1Wc2 <= 0 && e2Wc2 <= 0)
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctDifference:
+        if (((e1->PolyTyp == ptClip) && (e1Wc2 > 0) && (e2Wc2 > 0)) ||
+            ((e1->PolyTyp == ptSubject) && (e1Wc2 <= 0) && (e2Wc2 <= 0)))
+          AddLocalMinPoly(e1, e2, Pt);
+        break;
+      case ctXor:
+        AddLocalMinPoly(e1, e2, Pt);
+      }
+    else
+      SwapSides(*e1, *e2);
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::SetHoleState(TEdge *e, OutRec *outrec) {
+  TEdge *e2 = e->PrevInAEL;
+  TEdge *eTmp = 0;
+  while (e2) {
+    if (e2->OutIdx >= 0 && e2->WindDelta != 0) {
+      if (!eTmp)
+        eTmp = e2;
+      else if (eTmp->OutIdx == e2->OutIdx)
+        eTmp = 0;
+    }
+    e2 = e2->PrevInAEL;
+  }
+  if (!eTmp) {
+    outrec->FirstLeft = 0;
+    outrec->IsHole = false;
+  } else {
+    outrec->FirstLeft = m_PolyOuts[eTmp->OutIdx];
+    outrec->IsHole = !outrec->FirstLeft->IsHole;
+  }
+}
+//------------------------------------------------------------------------------
+OutRec *GetLowermostRec(OutRec *outRec1, OutRec *outRec2) {
+  // work out which polygon fragment has the correct hole state ...
+  if (!outRec1->BottomPt)
+    outRec1->BottomPt = GetBottomPt(outRec1->Pts);
+  if (!outRec2->BottomPt)
+    outRec2->BottomPt = GetBottomPt(outRec2->Pts);
+  OutPt *OutPt1 = outRec1->BottomPt;
+  OutPt *OutPt2 = outRec2->BottomPt;
+  if (OutPt1->Pt.Y > OutPt2->Pt.Y)
+    return outRec1;
+  else if (OutPt1->Pt.Y < OutPt2->Pt.Y)
+    return outRec2;
+  else if (OutPt1->Pt.X < OutPt2->Pt.X)
+    return outRec1;
+  else if (OutPt1->Pt.X > OutPt2->Pt.X)
+    return outRec2;
+  else if (OutPt1->Next == OutPt1)
+    return outRec2;
+  else if (OutPt2->Next == OutPt2)
+    return outRec1;
+  else if (FirstIsBottomPt(OutPt1, OutPt2))
+    return outRec1;
+  else
+    return outRec2;
+}
+//------------------------------------------------------------------------------
+bool OutRec1RightOfOutRec2(OutRec *outRec1, OutRec *outRec2) {
+  do {
+    outRec1 = outRec1->FirstLeft;
+    if (outRec1 == outRec2)
+      return true;
+  } while (outRec1);
+  return false;
+}
+//------------------------------------------------------------------------------
+OutRec *Clipper::GetOutRec(int Idx) {
+  OutRec *outrec = m_PolyOuts[Idx];
+  while (outrec != m_PolyOuts[outrec->Idx])
+    outrec = m_PolyOuts[outrec->Idx];
+  return outrec;
+}
+//------------------------------------------------------------------------------
+void Clipper::AppendPolygon(TEdge *e1, TEdge *e2) {
+  // get the start and ends of both output polygons ...
+  OutRec *outRec1 = m_PolyOuts[e1->OutIdx];
+  OutRec *outRec2 = m_PolyOuts[e2->OutIdx];
+  OutRec *holeStateRec;
+  if (OutRec1RightOfOutRec2(outRec1, outRec2))
+    holeStateRec = outRec2;
+  else if (OutRec1RightOfOutRec2(outRec2, outRec1))
+    holeStateRec = outRec1;
+  else
+    holeStateRec = GetLowermostRec(outRec1, outRec2);
+  // get the start and ends of both output polygons and
+  // join e2 poly onto e1 poly and delete pointers to e2 ...
+  OutPt *p1_lft = outRec1->Pts;
+  OutPt *p1_rt = p1_lft->Prev;
+  OutPt *p2_lft = outRec2->Pts;
+  OutPt *p2_rt = p2_lft->Prev;
+  // join e2 poly onto e1 poly and delete pointers to e2 ...
+  if (e1->Side == esLeft) {
+    if (e2->Side == esLeft) {
+      // z y x a b c
+      ReversePolyPtLinks(p2_lft);
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      outRec1->Pts = p2_rt;
+    } else {
+      // x y z a b c
+      p2_rt->Next = p1_lft;
+      p1_lft->Prev = p2_rt;
+      p2_lft->Prev = p1_rt;
+      p1_rt->Next = p2_lft;
+      outRec1->Pts = p2_lft;
+    }
+  } else {
+    if (e2->Side == esRight) {
+      // a b c z y x
+      ReversePolyPtLinks(p2_lft);
+      p1_rt->Next = p2_rt;
+      p2_rt->Prev = p1_rt;
+      p2_lft->Next = p1_lft;
+      p1_lft->Prev = p2_lft;
+    } else {
+      // a b c x y z
+      p1_rt->Next = p2_lft;
+      p2_lft->Prev = p1_rt;
+      p1_lft->Prev = p2_rt;
+      p2_rt->Next = p1_lft;
+    }
+  }
+  outRec1->BottomPt = 0;
+  if (holeStateRec == outRec2) {
+    if (outRec2->FirstLeft != outRec1)
+      outRec1->FirstLeft = outRec2->FirstLeft;
+    outRec1->IsHole = outRec2->IsHole;
+  }
+  outRec2->Pts = 0;
+  outRec2->BottomPt = 0;
+  outRec2->FirstLeft = outRec1;
+  int OKIdx = e1->OutIdx;
+  int ObsoleteIdx = e2->OutIdx;
+  e1->OutIdx =
+      Unassigned; // nb: safe because we only get here via AddLocalMaxPoly
+  e2->OutIdx = Unassigned;
+  TEdge *e = m_ActiveEdges;
+  while (e) {
+    if (e->OutIdx == ObsoleteIdx) {
+      e->OutIdx = OKIdx;
+      e->Side = e1->Side;
+      break;
+    }
+    e = e->NextInAEL;
+  }
+  outRec2->Idx = outRec1->Idx;
+}
+//------------------------------------------------------------------------------
+OutPt *Clipper::AddOutPt(TEdge *e, const IntPoint &pt) {
+  if (e->OutIdx < 0) {
+    OutRec *outRec = CreateOutRec();
+    outRec->IsOpen = (e->WindDelta == 0);
+    OutPt *newOp = new OutPt;
+    outRec->Pts = newOp;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = newOp;
+    newOp->Prev = newOp;
+    if (!outRec->IsOpen)
+      SetHoleState(e, outRec);
+    e->OutIdx = outRec->Idx;
+    return newOp;
+  } else {
+    OutRec *outRec = m_PolyOuts[e->OutIdx];
+    // OutRec.Pts is the 'Left-most' point & OutRec.Pts.Prev is the 'Right-most'
+    OutPt *op = outRec->Pts;
+    bool ToFront = (e->Side == esLeft);
+    if (ToFront && (pt == op->Pt))
+      return op;
+    else if (!ToFront && (pt == op->Prev->Pt))
+      return op->Prev;
+    OutPt *newOp = new OutPt;
+    newOp->Idx = outRec->Idx;
+    newOp->Pt = pt;
+    newOp->Next = op;
+    newOp->Prev = op->Prev;
+    newOp->Prev->Next = newOp;
+    op->Prev = newOp;
+    if (ToFront)
+      outRec->Pts = newOp;
+    return newOp;
+  }
+}
+//------------------------------------------------------------------------------
+OutPt *Clipper::GetLastOutPt(TEdge *e) {
+  OutRec *outRec = m_PolyOuts[e->OutIdx];
+  if (e->Side == esLeft)
+    return outRec->Pts;
+  else
+    return outRec->Pts->Prev;
+}
+//------------------------------------------------------------------------------
+void Clipper::ProcessHorizontals() {
+  TEdge *horzEdge;
+  while (PopEdgeFromSEL(horzEdge))
+    ProcessHorizontal(horzEdge);
+}
+//------------------------------------------------------------------------------
+inline bool IsMinima(TEdge *e) {
+  return e && (e->Prev->NextInLML != e) && (e->Next->NextInLML != e);
+}
+//------------------------------------------------------------------------------
+inline bool IsMaxima(TEdge *e, const cInt Y) {
+  return e && e->Top.Y == Y && !e->NextInLML;
+}
+//------------------------------------------------------------------------------
+inline bool IsIntermediate(TEdge *e, const cInt Y) {
+  return e->Top.Y == Y && e->NextInLML;
+}
+//------------------------------------------------------------------------------
+TEdge *GetMaximaPair(TEdge *e) {
+  if ((e->Next->Top == e->Top) && !e->Next->NextInLML)
+    return e->Next;
+  else if ((e->Prev->Top == e->Top) && !e->Prev->NextInLML)
+    return e->Prev;
+  else
+    return 0;
+}
+//------------------------------------------------------------------------------
+TEdge *GetMaximaPairEx(TEdge *e) {
+  // as GetMaximaPair() but returns 0 if MaxPair isn't in AEL (unless it's
+  // horizontal)
+  TEdge *result = GetMaximaPair(e);
+  if (result &&
+      (result->OutIdx == Skip ||
+       (result->NextInAEL == result->PrevInAEL && !IsHorizontal(*result))))
+    return 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+void Clipper::SwapPositionsInSEL(TEdge *Edge1, TEdge *Edge2) {
+  if (!(Edge1->NextInSEL) && !(Edge1->PrevInSEL))
+    return;
+  if (!(Edge2->NextInSEL) && !(Edge2->PrevInSEL))
+    return;
+  if (Edge1->NextInSEL == Edge2) {
+    TEdge *Next = Edge2->NextInSEL;
+    if (Next)
+      Next->PrevInSEL = Edge1;
+    TEdge *Prev = Edge1->PrevInSEL;
+    if (Prev)
+      Prev->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    Edge2->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2;
+    Edge1->NextInSEL = Next;
+  } else if (Edge2->NextInSEL == Edge1) {
+    TEdge *Next = Edge1->NextInSEL;
+    if (Next)
+      Next->PrevInSEL = Edge2;
+    TEdge *Prev = Edge2->PrevInSEL;
+    if (Prev)
+      Prev->NextInSEL = Edge1;
+    Edge1->PrevInSEL = Prev;
+    Edge1->NextInSEL = Edge2;
+    Edge2->PrevInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+  } else {
+    TEdge *Next = Edge1->NextInSEL;
+    TEdge *Prev = Edge1->PrevInSEL;
+    Edge1->NextInSEL = Edge2->NextInSEL;
+    if (Edge1->NextInSEL)
+      Edge1->NextInSEL->PrevInSEL = Edge1;
+    Edge1->PrevInSEL = Edge2->PrevInSEL;
+    if (Edge1->PrevInSEL)
+      Edge1->PrevInSEL->NextInSEL = Edge1;
+    Edge2->NextInSEL = Next;
+    if (Edge2->NextInSEL)
+      Edge2->NextInSEL->PrevInSEL = Edge2;
+    Edge2->PrevInSEL = Prev;
+    if (Edge2->PrevInSEL)
+      Edge2->PrevInSEL->NextInSEL = Edge2;
+  }
+  if (!Edge1->PrevInSEL)
+    m_SortedEdges = Edge1;
+  else if (!Edge2->PrevInSEL)
+    m_SortedEdges = Edge2;
+}
+//------------------------------------------------------------------------------
+TEdge *GetNextInAEL(TEdge *e, Direction dir) {
+  return dir == dLeftToRight ? e->NextInAEL : e->PrevInAEL;
+}
+//------------------------------------------------------------------------------
+void GetHorzDirection(TEdge &HorzEdge, Direction &Dir, cInt &Left,
+                      cInt &Right) {
+  if (HorzEdge.Bot.X < HorzEdge.Top.X) {
+    Left = HorzEdge.Bot.X;
+    Right = HorzEdge.Top.X;
+    Dir = dLeftToRight;
+  } else {
+    Left = HorzEdge.Top.X;
+    Right = HorzEdge.Bot.X;
+    Dir = dRightToLeft;
+  }
+}
+//------------------------------------------------------------------------
+/*******************************************************************************
+* Notes: Horizontal edges (HEs) at scanline intersections (ie at the Top or    *
+* Bottom of a scanbeam) are processed as if layered. The order in which HEs    *
+* are processed doesn't matter. HEs intersect with other HE Bot.Xs only [#]    *
+* (or they could intersect with Top.Xs only, ie EITHER Bot.Xs OR Top.Xs),      *
+* and with other non-horizontal edges [*]. Once these intersections are        *
+* processed, intermediate HEs then 'promote' the Edge above (NextInLML) into   *
+* the AEL. These 'promoted' edges may in turn intersect [%] with other HEs.    *
+*******************************************************************************/
+void Clipper::ProcessHorizontal(TEdge *horzEdge) {
+  Direction dir;
+  cInt horzLeft, horzRight;
+  bool IsOpen = (horzEdge->WindDelta == 0);
+  GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+  TEdge *eLastHorz = horzEdge, *eMaxPair = 0;
+  while (eLastHorz->NextInLML && IsHorizontal(*eLastHorz->NextInLML))
+    eLastHorz = eLastHorz->NextInLML;
+  if (!eLastHorz->NextInLML)
+    eMaxPair = GetMaximaPair(eLastHorz);
+  MaximaList::const_iterator maxIt;
+  MaximaList::const_reverse_iterator maxRit;
+  if (m_Maxima.size() > 0) {
+    // get the first maxima in range (X) ...
+    if (dir == dLeftToRight) {
+      maxIt = m_Maxima.begin();
+      while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X)
+        maxIt++;
+      if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X)
+        maxIt = m_Maxima.end();
+    } else {
+      maxRit = m_Maxima.rbegin();
+      while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X)
+        maxRit++;
+      if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X)
+        maxRit = m_Maxima.rend();
+    }
+  }
+  OutPt *op1 = 0;
+  for (;;) // loop through consec. horizontal edges
+  {
+    bool IsLastHorz = (horzEdge == eLastHorz);
+    TEdge *e = GetNextInAEL(horzEdge, dir);
+    while (e) {
+      // this code block inserts extra coords into horizontal edges (in output
+      // polygons) whereever maxima touch these horizontal edges. This helps
+      //'simplifying' polygons (ie if the Simplify property is set).
+      if (m_Maxima.size() > 0) {
+        if (dir == dLeftToRight) {
+          while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X) {
+            if (horzEdge->OutIdx >= 0 && !IsOpen)
+              AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y));
+            maxIt++;
+          }
+        } else {
+          while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X) {
+            if (horzEdge->OutIdx >= 0 && !IsOpen)
+              AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y));
+            maxRit++;
+          }
+        }
+      };
+      if ((dir == dLeftToRight && e->Curr.X > horzRight) ||
+          (dir == dRightToLeft && e->Curr.X < horzLeft))
+        break;
+      // Also break if we've got to the end of an intermediate horizontal edge
+      // ...
+      // nb: Smaller Dx's are to the right of larger Dx's ABOVE the horizontal.
+      if (e->Curr.X == horzEdge->Top.X && horzEdge->NextInLML &&
+          e->Dx < horzEdge->NextInLML->Dx)
+        break;
+      if (horzEdge->OutIdx >= 0 && !IsOpen) // note: may be done multiple times
+      {
+#ifdef use_xyz
+        if (dir == dLeftToRight)
+          SetZ(e->Curr, *horzEdge, *e);
+        else
+          SetZ(e->Curr, *e, *horzEdge);
+#endif
+        op1 = AddOutPt(horzEdge, e->Curr);
+        TEdge *eNextHorz = m_SortedEdges;
+        while (eNextHorz) {
+          if (eNextHorz->OutIdx >= 0 &&
+              HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X,
+                                  eNextHorz->Bot.X, eNextHorz->Top.X)) {
+            OutPt *op2 = GetLastOutPt(eNextHorz);
+            AddJoin(op2, op1, eNextHorz->Top);
+          }
+          eNextHorz = eNextHorz->NextInSEL;
+        }
+        AddGhostJoin(op1, horzEdge->Bot);
+      }
+      // OK, so far we're still in range of the horizontal Edge  but make sure
+      // we're at the last of consec. horizontals when matching with eMaxPair
+      if (e == eMaxPair && IsLastHorz) {
+        if (horzEdge->OutIdx >= 0)
+          AddLocalMaxPoly(horzEdge, eMaxPair, horzEdge->Top);
+        DeleteFromAEL(horzEdge);
+        DeleteFromAEL(eMaxPair);
+        return;
+      }
+      if (dir == dLeftToRight) {
+        IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+        IntersectEdges(horzEdge, e, Pt);
+      } else {
+        IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
+        IntersectEdges(e, horzEdge, Pt);
+      }
+      TEdge *eNext = GetNextInAEL(e, dir);
+      SwapPositionsInAEL(horzEdge, e);
+      e = eNext;
+    } // end while(e)
+    // Break out of loop if HorzEdge.NextInLML is not also horizontal ...
+    if (!horzEdge->NextInLML || !IsHorizontal(*horzEdge->NextInLML))
+      break;
+    UpdateEdgeIntoAEL(horzEdge);
+    if (horzEdge->OutIdx >= 0)
+      AddOutPt(horzEdge, horzEdge->Bot);
+    GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
+  } // end for (;;)
+  if (horzEdge->OutIdx >= 0 && !op1) {
+    op1 = GetLastOutPt(horzEdge);
+    TEdge *eNextHorz = m_SortedEdges;
+    while (eNextHorz) {
+      if (eNextHorz->OutIdx >= 0 &&
+          HorzSegmentsOverlap(horzEdge->Bot.X, horzEdge->Top.X,
+                              eNextHorz->Bot.X, eNextHorz->Top.X)) {
+        OutPt *op2 = GetLastOutPt(eNextHorz);
+        AddJoin(op2, op1, eNextHorz->Top);
+      }
+      eNextHorz = eNextHorz->NextInSEL;
+    }
+    AddGhostJoin(op1, horzEdge->Top);
+  }
+  if (horzEdge->NextInLML) {
+    if (horzEdge->OutIdx >= 0) {
+      op1 = AddOutPt(horzEdge, horzEdge->Top);
+      UpdateEdgeIntoAEL(horzEdge);
+      if (horzEdge->WindDelta == 0)
+        return;
+      // nb: HorzEdge is no longer horizontal here
+      TEdge *ePrev = horzEdge->PrevInAEL;
+      TEdge *eNext = horzEdge->NextInAEL;
+      if (ePrev && ePrev->Curr.X == horzEdge->Bot.X &&
+          ePrev->Curr.Y == horzEdge->Bot.Y && ePrev->WindDelta != 0 &&
+          (ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+           SlopesEqual(*horzEdge, *ePrev, m_UseFullRange))) {
+        OutPt *op2 = AddOutPt(ePrev, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      } else if (eNext && eNext->Curr.X == horzEdge->Bot.X &&
+                 eNext->Curr.Y == horzEdge->Bot.Y && eNext->WindDelta != 0 &&
+                 eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
+                 SlopesEqual(*horzEdge, *eNext, m_UseFullRange)) {
+        OutPt *op2 = AddOutPt(eNext, horzEdge->Bot);
+        AddJoin(op1, op2, horzEdge->Top);
+      }
+    } else
+      UpdateEdgeIntoAEL(horzEdge);
+  } else {
+    if (horzEdge->OutIdx >= 0)
+      AddOutPt(horzEdge, horzEdge->Top);
+    DeleteFromAEL(horzEdge);
+  }
+}
+//------------------------------------------------------------------------------
+bool Clipper::ProcessIntersections(const cInt topY) {
+  if (!m_ActiveEdges)
+    return true;
+  try {
+    BuildIntersectList(topY);
+    size_t IlSize = m_IntersectList.size();
+    if (IlSize == 0)
+      return true;
+    if (IlSize == 1 || FixupIntersectionOrder())
+      ProcessIntersectList();
+    else
+      return false;
+  } catch (...) {
+    m_SortedEdges = 0;
+    DisposeIntersectNodes();
+    throw clipperException("ProcessIntersections error");
+  }
+  m_SortedEdges = 0;
+  return true;
+}
+//------------------------------------------------------------------------------
+void Clipper::DisposeIntersectNodes() {
+  for (size_t i = 0; i < m_IntersectList.size(); ++i)
+    delete m_IntersectList[i];
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+void Clipper::BuildIntersectList(const cInt topY) {
+  if (!m_ActiveEdges)
+    return;
+  // prepare for sorting ...
+  TEdge *e = m_ActiveEdges;
+  m_SortedEdges = e;
+  while (e) {
+    e->PrevInSEL = e->PrevInAEL;
+    e->NextInSEL = e->NextInAEL;
+    e->Curr.X = TopX(*e, topY);
+    e = e->NextInAEL;
+  }
+  // bubblesort ...
+  bool isModified;
+  do {
+    isModified = false;
+    e = m_SortedEdges;
+    while (e->NextInSEL) {
+      TEdge *eNext = e->NextInSEL;
+      IntPoint Pt;
+      if (e->Curr.X > eNext->Curr.X) {
+        IntersectPoint(*e, *eNext, Pt);
+        if (Pt.Y < topY)
+          Pt = IntPoint(TopX(*e, topY), topY);
+        IntersectNode *newNode = new IntersectNode;
+        newNode->Edge1 = e;
+        newNode->Edge2 = eNext;
+        newNode->Pt = Pt;
+        m_IntersectList.push_back(newNode);
+        SwapPositionsInSEL(e, eNext);
+        isModified = true;
+      } else
+        e = eNext;
+    }
+    if (e->PrevInSEL)
+      e->PrevInSEL->NextInSEL = 0;
+    else
+      break;
+  } while (isModified);
+  m_SortedEdges = 0; // important
+}
+//------------------------------------------------------------------------------
+void Clipper::ProcessIntersectList() {
+  for (size_t i = 0; i < m_IntersectList.size(); ++i) {
+    IntersectNode *iNode = m_IntersectList[i];
+    {
+      IntersectEdges(iNode->Edge1, iNode->Edge2, iNode->Pt);
+      SwapPositionsInAEL(iNode->Edge1, iNode->Edge2);
+    }
+    delete iNode;
+  }
+  m_IntersectList.clear();
+}
+//------------------------------------------------------------------------------
+bool IntersectListSort(IntersectNode *node1, IntersectNode *node2) {
+  return node2->Pt.Y < node1->Pt.Y;
+}
+//------------------------------------------------------------------------------
+inline bool EdgesAdjacent(const IntersectNode &inode) {
+  return (inode.Edge1->NextInSEL == inode.Edge2) ||
+         (inode.Edge1->PrevInSEL == inode.Edge2);
+}
+//------------------------------------------------------------------------------
+bool Clipper::FixupIntersectionOrder() {
+  // pre-condition: intersections are sorted Bottom-most first.
+  // Now it's crucial that intersections are made only between adjacent edges,
+  // so to ensure this the order of intersections may need adjusting ...
+  CopyAELToSEL();
+  std::sort(m_IntersectList.begin(), m_IntersectList.end(), IntersectListSort);
+  size_t cnt = m_IntersectList.size();
+  for (size_t i = 0; i < cnt; ++i) {
+    if (!EdgesAdjacent(*m_IntersectList[i])) {
+      size_t j = i + 1;
+      while (j < cnt && !EdgesAdjacent(*m_IntersectList[j]))
+        j++;
+      if (j == cnt)
+        return false;
+      std::swap(m_IntersectList[i], m_IntersectList[j]);
+    }
+    SwapPositionsInSEL(m_IntersectList[i]->Edge1, m_IntersectList[i]->Edge2);
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+void Clipper::DoMaxima(TEdge *e) {
+  TEdge *eMaxPair = GetMaximaPairEx(e);
+  if (!eMaxPair) {
+    if (e->OutIdx >= 0)
+      AddOutPt(e, e->Top);
+    DeleteFromAEL(e);
+    return;
+  }
+  TEdge *eNext = e->NextInAEL;
+  while (eNext && eNext != eMaxPair) {
+    IntersectEdges(e, eNext, e->Top);
+    SwapPositionsInAEL(e, eNext);
+    eNext = e->NextInAEL;
+  }
+  if (e->OutIdx == Unassigned && eMaxPair->OutIdx == Unassigned) {
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  } else if (e->OutIdx >= 0 && eMaxPair->OutIdx >= 0) {
+    if (e->OutIdx >= 0)
+      AddLocalMaxPoly(e, eMaxPair, e->Top);
+    DeleteFromAEL(e);
+    DeleteFromAEL(eMaxPair);
+  }
+#ifdef use_lines
+  else if (e->WindDelta == 0) {
+    if (e->OutIdx >= 0) {
+      AddOutPt(e, e->Top);
+      e->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(e);
+    if (eMaxPair->OutIdx >= 0) {
+      AddOutPt(eMaxPair, e->Top);
+      eMaxPair->OutIdx = Unassigned;
+    }
+    DeleteFromAEL(eMaxPair);
+  }
+#endif
+  else
+    throw clipperException("DoMaxima error");
+}
+//------------------------------------------------------------------------------
+void Clipper::ProcessEdgesAtTopOfScanbeam(const cInt topY) {
+  TEdge *e = m_ActiveEdges;
+  while (e) {
+    // 1. process maxima, treating them as if they're 'bent' horizontal edges,
+    //   but exclude maxima with horizontal edges. nb: e can't be a horizontal.
+    bool IsMaximaEdge = IsMaxima(e, topY);
+    if (IsMaximaEdge) {
+      TEdge *eMaxPair = GetMaximaPairEx(e);
+      IsMaximaEdge = (!eMaxPair || !IsHorizontal(*eMaxPair));
+    }
+    if (IsMaximaEdge) {
+      if (m_StrictSimple)
+        m_Maxima.push_back(e->Top.X);
+      TEdge *ePrev = e->PrevInAEL;
+      DoMaxima(e);
+      if (!ePrev)
+        e = m_ActiveEdges;
+      else
+        e = ePrev->NextInAEL;
+    } else {
+      // 2. promote horizontal edges, otherwise update Curr.X and Curr.Y ...
+      if (IsIntermediate(e, topY) && IsHorizontal(*e->NextInLML)) {
+        UpdateEdgeIntoAEL(e);
+        if (e->OutIdx >= 0)
+          AddOutPt(e, e->Bot);
+        AddEdgeToSEL(e);
+      } else {
+        e->Curr.X = TopX(*e, topY);
+        e->Curr.Y = topY;
+#ifdef use_xyz
+        e->Curr.Z =
+            topY == e->Top.Y ? e->Top.Z : (topY == e->Bot.Y ? e->Bot.Z : 0);
+#endif
+      }
+      // When StrictlySimple and 'e' is being touched by another edge, then
+      // make sure both edges have a vertex here ...
+      if (m_StrictSimple) {
+        TEdge *ePrev = e->PrevInAEL;
+        if ((e->OutIdx >= 0) && (e->WindDelta != 0) && ePrev &&
+            (ePrev->OutIdx >= 0) && (ePrev->Curr.X == e->Curr.X) &&
+            (ePrev->WindDelta != 0)) {
+          IntPoint pt = e->Curr;
+#ifdef use_xyz
+          SetZ(pt, *ePrev, *e);
+#endif
+          OutPt *op = AddOutPt(ePrev, pt);
+          OutPt *op2 = AddOutPt(e, pt);
+          AddJoin(op, op2, pt); // StrictlySimple (type-3) join
+        }
+      }
+      e = e->NextInAEL;
+    }
+  }
+  // 3. Process horizontals at the Top of the scanbeam ...
+  m_Maxima.sort();
+  ProcessHorizontals();
+  m_Maxima.clear();
+  // 4. Promote intermediate vertices ...
+  e = m_ActiveEdges;
+  while (e) {
+    if (IsIntermediate(e, topY)) {
+      OutPt *op = 0;
+      if (e->OutIdx >= 0)
+        op = AddOutPt(e, e->Top);
+      UpdateEdgeIntoAEL(e);
+      // if output polygons share an edge, they'll need joining later ...
+      TEdge *ePrev = e->PrevInAEL;
+      TEdge *eNext = e->NextInAEL;
+      if (ePrev && ePrev->Curr.X == e->Bot.X && ePrev->Curr.Y == e->Bot.Y &&
+          op && ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
+          SlopesEqual(e->Curr, e->Top, ePrev->Curr, ePrev->Top,
+                      m_UseFullRange) &&
+          (e->WindDelta != 0) && (ePrev->WindDelta != 0)) {
+        OutPt *op2 = AddOutPt(ePrev, e->Bot);
+        AddJoin(op, op2, e->Top);
+      } else if (eNext && eNext->Curr.X == e->Bot.X &&
+                 eNext->Curr.Y == e->Bot.Y && op && eNext->OutIdx >= 0 &&
+                 eNext->Curr.Y > eNext->Top.Y &&
+                 SlopesEqual(e->Curr, e->Top, eNext->Curr, eNext->Top,
+                             m_UseFullRange) &&
+                 (e->WindDelta != 0) && (eNext->WindDelta != 0)) {
+        OutPt *op2 = AddOutPt(eNext, e->Bot);
+        AddJoin(op, op2, e->Top);
+      }
+    }
+    e = e->NextInAEL;
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::FixupOutPolyline(OutRec &outrec) {
+  OutPt *pp = outrec.Pts;
+  OutPt *lastPP = pp->Prev;
+  while (pp != lastPP) {
+    pp = pp->Next;
+    if (pp->Pt == pp->Prev->Pt) {
+      if (pp == lastPP)
+        lastPP = pp->Prev;
+      OutPt *tmpPP = pp->Prev;
+      tmpPP->Next = pp->Next;
+      pp->Next->Prev = tmpPP;
+      delete pp;
+      pp = tmpPP;
+    }
+  }
+  if (pp == pp->Prev) {
+    DisposeOutPts(pp);
+    outrec.Pts = 0;
+    return;
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::FixupOutPolygon(OutRec &outrec) {
+  // FixupOutPolygon() - removes duplicate points and simplifies consecutive
+  // parallel edges by removing the middle vertex.
+  OutPt *lastOK = 0;
+  outrec.BottomPt = 0;
+  OutPt *pp = outrec.Pts;
+  bool preserveCol = m_PreserveCollinear || m_StrictSimple;
+  for (;;) {
+    if (pp->Prev == pp || pp->Prev == pp->Next) {
+      DisposeOutPts(pp);
+      outrec.Pts = 0;
+      return;
+    }
+    // test for duplicate points and collinear edges ...
+    if ((pp->Pt == pp->Next->Pt) || (pp->Pt == pp->Prev->Pt) ||
+        (SlopesEqual(pp->Prev->Pt, pp->Pt, pp->Next->Pt, m_UseFullRange) &&
+         (!preserveCol ||
+          !Pt2IsBetweenPt1AndPt3(pp->Prev->Pt, pp->Pt, pp->Next->Pt)))) {
+      lastOK = 0;
+      OutPt *tmp = pp;
+      pp->Prev->Next = pp->Next;
+      pp->Next->Prev = pp->Prev;
+      pp = pp->Prev;
+      delete tmp;
+    } else if (pp == lastOK)
+      break;
+    else {
+      if (!lastOK)
+        lastOK = pp;
+      pp = pp->Next;
+    }
+  }
+  outrec.Pts = pp;
+}
+//------------------------------------------------------------------------------
+int PointCount(OutPt *Pts) {
+  if (!Pts)
+    return 0;
+  int result = 0;
+  OutPt *p = Pts;
+  do {
+    result++;
+    p = p->Next;
+  } while (p != Pts);
+  return result;
+}
+//------------------------------------------------------------------------------
+void Clipper::BuildResult(Paths &polys) {
+  polys.reserve(m_PolyOuts.size());
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    if (!m_PolyOuts[i]->Pts)
+      continue;
+    Path pg;
+    OutPt *p = m_PolyOuts[i]->Pts->Prev;
+    int cnt = PointCount(p);
+    if (cnt < 2)
+      continue;
+    pg.reserve(cnt);
+    for (int i = 0; i < cnt; ++i) {
+      pg.push_back(p->Pt);
+      p = p->Prev;
+    }
+    polys.push_back(pg);
+  }
+}
+//------------------------------------------------------------------------------
+void Clipper::BuildResult2(PolyTree &polytree) {
+  polytree.Clear();
+  polytree.AllNodes.reserve(m_PolyOuts.size());
+  // add each output polygon/contour to polytree ...
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) {
+    OutRec *outRec = m_PolyOuts[i];
+    int cnt = PointCount(outRec->Pts);
+    if ((outRec->IsOpen && cnt < 2) || (!outRec->IsOpen && cnt < 3))
+      continue;
+    FixHoleLinkage(*outRec);
+    PolyNode *pn = new PolyNode();
+    // nb: polytree takes ownership of all the PolyNodes
+    polytree.AllNodes.push_back(pn);
+    outRec->PolyNd = pn;
+    pn->Parent = 0;
+    pn->Index = 0;
+    pn->Contour.reserve(cnt);
+    OutPt *op = outRec->Pts->Prev;
+    for (int j = 0; j < cnt; j++) {
+      pn->Contour.push_back(op->Pt);
+      op = op->Prev;
+    }
+  }
+  // fixup PolyNode links etc ...
+  polytree.Childs.reserve(m_PolyOuts.size());
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++) {
+    OutRec *outRec = m_PolyOuts[i];
+    if (!outRec->PolyNd)
+      continue;
+    if (outRec->IsOpen) {
+      outRec->PolyNd->m_IsOpen = true;
+      polytree.AddChild(*outRec->PolyNd);
+    } else if (outRec->FirstLeft && outRec->FirstLeft->PolyNd)
+      outRec->FirstLeft->PolyNd->AddChild(*outRec->PolyNd);
+    else
+      polytree.AddChild(*outRec->PolyNd);
+  }
+}
+//------------------------------------------------------------------------------
+void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2) {
+  // just swap the contents (because fIntersectNodes is a single-linked-list)
+  IntersectNode inode = int1; // gets a copy of Int1
+  int1.Edge1 = int2.Edge1;
+  int1.Edge2 = int2.Edge2;
+  int1.Pt = int2.Pt;
+  int2.Edge1 = inode.Edge1;
+  int2.Edge2 = inode.Edge2;
+  int2.Pt = inode.Pt;
+}
+//------------------------------------------------------------------------------
+inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2) {
+  if (e2.Curr.X == e1.Curr.X) {
+    if (e2.Top.Y > e1.Top.Y)
+      return e2.Top.X < TopX(e1, e2.Top.Y);
+    else
+      return e1.Top.X > TopX(e2, e1.Top.Y);
+  } else
+    return e2.Curr.X < e1.Curr.X;
+}
+//------------------------------------------------------------------------------
+bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cInt b2,
+                cInt &Left, cInt &Right) {
+  if (a1 < a2) {
+    if (b1 < b2) {
+      Left = std::max(a1, b1);
+      Right = std::min(a2, b2);
+    } else {
+      Left = std::max(a1, b2);
+      Right = std::min(a2, b1);
+    }
+  } else {
+    if (b1 < b2) {
+      Left = std::max(a2, b1);
+      Right = std::min(a1, b2);
+    } else {
+      Left = std::max(a2, b2);
+      Right = std::min(a1, b1);
+    }
+  }
+  return Left < Right;
+}
+//------------------------------------------------------------------------------
+inline void UpdateOutPtIdxs(OutRec &outrec) {
+  OutPt *op = outrec.Pts;
+  do {
+    op->Idx = outrec.Idx;
+    op = op->Prev;
+  } while (op != outrec.Pts);
+}
+//------------------------------------------------------------------------------
+void Clipper::InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge) {
+  if (!m_ActiveEdges) {
+    edge->PrevInAEL = 0;
+    edge->NextInAEL = 0;
+    m_ActiveEdges = edge;
+  } else if (!startEdge && E2InsertsBeforeE1(*m_ActiveEdges, *edge)) {
+    edge->PrevInAEL = 0;
+    edge->NextInAEL = m_ActiveEdges;
+    m_ActiveEdges->PrevInAEL = edge;
+    m_ActiveEdges = edge;
+  } else {
+    if (!startEdge)
+      startEdge = m_ActiveEdges;
+    while (startEdge->NextInAEL &&
+           !E2InsertsBeforeE1(*startEdge->NextInAEL, *edge))
+      startEdge = startEdge->NextInAEL;
+    edge->NextInAEL = startEdge->NextInAEL;
+    if (startEdge->NextInAEL)
+      startEdge->NextInAEL->PrevInAEL = edge;
+    edge->PrevInAEL = startEdge;
+    startEdge->NextInAEL = edge;
+  }
+}
+//----------------------------------------------------------------------
+OutPt *DupOutPt(OutPt *outPt, bool InsertAfter) {
+  OutPt *result = new OutPt;
+  result->Pt = outPt->Pt;
+  result->Idx = outPt->Idx;
+  if (InsertAfter) {
+    result->Next = outPt->Next;
+    result->Prev = outPt;
+    outPt->Next->Prev = result;
+    outPt->Next = result;
+  } else {
+    result->Prev = outPt->Prev;
+    result->Next = outPt;
+    outPt->Prev->Next = result;
+    outPt->Prev = result;
+  }
+  return result;
+}
+//------------------------------------------------------------------------------
+bool JoinHorz(OutPt *op1, OutPt *op1b, OutPt *op2, OutPt *op2b,
+              const IntPoint Pt, bool DiscardLeft) {
+  Direction Dir1 = (op1->Pt.X > op1b->Pt.X ? dRightToLeft : dLeftToRight);
+  Direction Dir2 = (op2->Pt.X > op2b->Pt.X ? dRightToLeft : dLeftToRight);
+  if (Dir1 == Dir2)
+    return false;
+  // When DiscardLeft, we want Op1b to be on the Left of Op1, otherwise we
+  // want Op1b to be on the Right. (And likewise with Op2 and Op2b.)
+  // So, to facilitate this while inserting Op1b and Op2b ...
+  // when DiscardLeft, make sure we're AT or RIGHT of Pt before adding Op1b,
+  // otherwise make sure we're AT or LEFT of Pt. (Likewise with Op2b.)
+  if (Dir1 == dLeftToRight) {
+    while (op1->Next->Pt.X <= Pt.X && op1->Next->Pt.X >= op1->Pt.X &&
+           op1->Next->Pt.Y == Pt.Y)
+      op1 = op1->Next;
+    if (DiscardLeft && (op1->Pt.X != Pt.X))
+      op1 = op1->Next;
+    op1b = DupOutPt(op1, !DiscardLeft);
+    if (op1b->Pt != Pt) {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, !DiscardLeft);
+    }
+  } else {
+    while (op1->Next->Pt.X >= Pt.X && op1->Next->Pt.X <= op1->Pt.X &&
+           op1->Next->Pt.Y == Pt.Y)
+      op1 = op1->Next;
+    if (!DiscardLeft && (op1->Pt.X != Pt.X))
+      op1 = op1->Next;
+    op1b = DupOutPt(op1, DiscardLeft);
+    if (op1b->Pt != Pt) {
+      op1 = op1b;
+      op1->Pt = Pt;
+      op1b = DupOutPt(op1, DiscardLeft);
+    }
+  }
+  if (Dir2 == dLeftToRight) {
+    while (op2->Next->Pt.X <= Pt.X && op2->Next->Pt.X >= op2->Pt.X &&
+           op2->Next->Pt.Y == Pt.Y)
+      op2 = op2->Next;
+    if (DiscardLeft && (op2->Pt.X != Pt.X))
+      op2 = op2->Next;
+    op2b = DupOutPt(op2, !DiscardLeft);
+    if (op2b->Pt != Pt) {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, !DiscardLeft);
+    };
+  } else {
+    while (op2->Next->Pt.X >= Pt.X && op2->Next->Pt.X <= op2->Pt.X &&
+           op2->Next->Pt.Y == Pt.Y)
+      op2 = op2->Next;
+    if (!DiscardLeft && (op2->Pt.X != Pt.X))
+      op2 = op2->Next;
+    op2b = DupOutPt(op2, DiscardLeft);
+    if (op2b->Pt != Pt) {
+      op2 = op2b;
+      op2->Pt = Pt;
+      op2b = DupOutPt(op2, DiscardLeft);
+    };
+  };
+  if ((Dir1 == dLeftToRight) == DiscardLeft) {
+    op1->Prev = op2;
+    op2->Next = op1;
+    op1b->Next = op2b;
+    op2b->Prev = op1b;
+  } else {
+    op1->Next = op2;
+    op2->Prev = op1;
+    op1b->Prev = op2b;
+    op2b->Next = op1b;
+  }
+  return true;
+}
+//------------------------------------------------------------------------------
+bool Clipper::JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2) {
+  OutPt *op1 = j->OutPt1, *op1b;
+  OutPt *op2 = j->OutPt2, *op2b;
+  // There are 3 kinds of joins for output polygons ...
+  // 1. Horizontal joins where Join.OutPt1 & Join.OutPt2 are vertices anywhere
+  // along (horizontal) collinear edges (& Join.OffPt is on the same
+  // horizontal).
+  // 2. Non-horizontal joins where Join.OutPt1 & Join.OutPt2 are at the same
+  // location at the Bottom of the overlapping segment (& Join.OffPt is above).
+  // 3. StrictSimple joins where edges touch but are not collinear and where
+  // Join.OutPt1, Join.OutPt2 & Join.OffPt all share the same point.
+  bool isHorizontal = (j->OutPt1->Pt.Y == j->OffPt.Y);
+  if (isHorizontal && (j->OffPt == j->OutPt1->Pt) &&
+      (j->OffPt == j->OutPt2->Pt)) {
+    // Strictly Simple join ...
+    if (outRec1 != outRec2)
+      return false;
+    op1b = j->OutPt1->Next;
+    while (op1b != op1 && (op1b->Pt == j->OffPt))
+      op1b = op1b->Next;
+    bool reverse1 = (op1b->Pt.Y > j->OffPt.Y);
+    op2b = j->OutPt2->Next;
+    while (op2b != op2 && (op2b->Pt == j->OffPt))
+      op2b = op2b->Next;
+    bool reverse2 = (op2b->Pt.Y > j->OffPt.Y);
+    if (reverse1 == reverse2)
+      return false;
+    if (reverse1) {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  } else if (isHorizontal) {
+    // treat horizontal joins differently to non-horizontal joins since with
+    // them we're not yet sure where the overlapping is. OutPt1.Pt & OutPt2.Pt
+    // may be anywhere along the horizontal edge.
+    op1b = op1;
+    while (op1->Prev->Pt.Y == op1->Pt.Y && op1->Prev != op1b &&
+           op1->Prev != op2)
+      op1 = op1->Prev;
+    while (op1b->Next->Pt.Y == op1b->Pt.Y && op1b->Next != op1 &&
+           op1b->Next != op2)
+      op1b = op1b->Next;
+    if (op1b->Next == op1 || op1b->Next == op2)
+      return false; // a flat 'polygon'
+    op2b = op2;
+    while (op2->Prev->Pt.Y == op2->Pt.Y && op2->Prev != op2b &&
+           op2->Prev != op1b)
+      op2 = op2->Prev;
+    while (op2b->Next->Pt.Y == op2b->Pt.Y && op2b->Next != op2 &&
+           op2b->Next != op1)
+      op2b = op2b->Next;
+    if (op2b->Next == op2 || op2b->Next == op1)
+      return false; // a flat 'polygon'
+    cInt Left, Right;
+    // Op1 --> Op1b & Op2 --> Op2b are the extremites of the horizontal edges
+    if (!GetOverlap(op1->Pt.X, op1b->Pt.X, op2->Pt.X, op2b->Pt.X, Left, Right))
+      return false;
+    // DiscardLeftSide: when overlapping edges are joined, a spike will created
+    // which needs to be cleaned up. However, we don't want Op1 or Op2 caught up
+    // on the discard Side as either may still be needed for other joins ...
+    IntPoint Pt;
+    bool DiscardLeftSide;
+    if (op1->Pt.X >= Left && op1->Pt.X <= Right) {
+      Pt = op1->Pt;
+      DiscardLeftSide = (op1->Pt.X > op1b->Pt.X);
+    } else if (op2->Pt.X >= Left && op2->Pt.X <= Right) {
+      Pt = op2->Pt;
+      DiscardLeftSide = (op2->Pt.X > op2b->Pt.X);
+    } else if (op1b->Pt.X >= Left && op1b->Pt.X <= Right) {
+      Pt = op1b->Pt;
+      DiscardLeftSide = op1b->Pt.X > op1->Pt.X;
+    } else {
+      Pt = op2b->Pt;
+      DiscardLeftSide = (op2b->Pt.X > op2->Pt.X);
+    }
+    j->OutPt1 = op1;
+    j->OutPt2 = op2;
+    return JoinHorz(op1, op1b, op2, op2b, Pt, DiscardLeftSide);
+  } else {
+    // nb: For non-horizontal joins ...
+    //    1. Jr.OutPt1.Pt.Y == Jr.OutPt2.Pt.Y
+    //    2. Jr.OutPt1.Pt > Jr.OffPt.Y
+    // make sure the polygons are correctly oriented ...
+    op1b = op1->Next;
+    while ((op1b->Pt == op1->Pt) && (op1b != op1))
+      op1b = op1b->Next;
+    bool Reverse1 = ((op1b->Pt.Y > op1->Pt.Y) ||
+                     !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse1) {
+      op1b = op1->Prev;
+      while ((op1b->Pt == op1->Pt) && (op1b != op1))
+        op1b = op1b->Prev;
+      if ((op1b->Pt.Y > op1->Pt.Y) ||
+          !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange))
+        return false;
+    };
+    op2b = op2->Next;
+    while ((op2b->Pt == op2->Pt) && (op2b != op2))
+      op2b = op2b->Next;
+    bool Reverse2 = ((op2b->Pt.Y > op2->Pt.Y) ||
+                     !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange));
+    if (Reverse2) {
+      op2b = op2->Prev;
+      while ((op2b->Pt == op2->Pt) && (op2b != op2))
+        op2b = op2b->Prev;
+      if ((op2b->Pt.Y > op2->Pt.Y) ||
+          !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange))
+        return false;
+    }
+    if ((op1b == op1) || (op2b == op2) || (op1b == op2b) ||
+        ((outRec1 == outRec2) && (Reverse1 == Reverse2)))
+      return false;
+    if (Reverse1) {
+      op1b = DupOutPt(op1, false);
+      op2b = DupOutPt(op2, true);
+      op1->Prev = op2;
+      op2->Next = op1;
+      op1b->Next = op2b;
+      op2b->Prev = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    } else {
+      op1b = DupOutPt(op1, true);
+      op2b = DupOutPt(op2, false);
+      op1->Next = op2;
+      op2->Prev = op1;
+      op1b->Prev = op2b;
+      op2b->Next = op1b;
+      j->OutPt1 = op1;
+      j->OutPt2 = op1b;
+      return true;
+    }
+  }
+}
+//----------------------------------------------------------------------
+static OutRec *ParseFirstLeft(OutRec *FirstLeft) {
+  while (FirstLeft && !FirstLeft->Pts)
+    FirstLeft = FirstLeft->FirstLeft;
+  return FirstLeft;
+}
+//------------------------------------------------------------------------------
+void Clipper::FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec) {
+  // tests if NewOutRec contains the polygon before reassigning FirstLeft
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts && firstLeft == OldOutRec) {
+      if (Poly2ContainsPoly1(outRec->Pts, NewOutRec->Pts))
+        outRec->FirstLeft = NewOutRec;
+    }
+  }
+}
+//----------------------------------------------------------------------
+void Clipper::FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec) {
+  // A polygon has split into two such that one is now the inner of the other.
+  // It's possible that these polygons now wrap around other polygons, so check
+  // every polygon that's also contained by OuterOutRec's FirstLeft container
+  //(including 0) to see if they've become inner to the new inner polygon ...
+  OutRec *orfl = OuterOutRec->FirstLeft;
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+    if (!outRec->Pts || outRec == OuterOutRec || outRec == InnerOutRec)
+      continue;
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (firstLeft != orfl && firstLeft != InnerOutRec &&
+        firstLeft != OuterOutRec)
+      continue;
+    if (Poly2ContainsPoly1(outRec->Pts, InnerOutRec->Pts))
+      outRec->FirstLeft = InnerOutRec;
+    else if (Poly2ContainsPoly1(outRec->Pts, OuterOutRec->Pts))
+      outRec->FirstLeft = OuterOutRec;
+    else if (outRec->FirstLeft == InnerOutRec ||
+             outRec->FirstLeft == OuterOutRec)
+      outRec->FirstLeft = orfl;
+  }
+}
+//----------------------------------------------------------------------
+void Clipper::FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec) {
+  // reassigns FirstLeft WITHOUT testing if NewOutRec contains the polygon
+  for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i) {
+    OutRec *outRec = m_PolyOuts[i];
+    OutRec *firstLeft = ParseFirstLeft(outRec->FirstLeft);
+    if (outRec->Pts && firstLeft == OldOutRec)
+      outRec->FirstLeft = NewOutRec;
+  }
+}
+//----------------------------------------------------------------------
+void Clipper::JoinCommonEdges() {
+  for (JoinList::size_type i = 0; i < m_Joins.size(); i++) {
+    Join *join = m_Joins[i];
+    OutRec *outRec1 = GetOutRec(join->OutPt1->Idx);
+    OutRec *outRec2 = GetOutRec(join->OutPt2->Idx);
+    if (!outRec1->Pts || !outRec2->Pts)
+      continue;
+    if (outRec1->IsOpen || outRec2->IsOpen)
+      continue;
+    // get the polygon fragment with the correct hole state (FirstLeft)
+    // before calling JoinPoints() ...
+    OutRec *holeStateRec;
+    if (outRec1 == outRec2)
+      holeStateRec = outRec1;
+    else if (OutRec1RightOfOutRec2(outRec1, outRec2))
+      holeStateRec = outRec2;
+    else if (OutRec1RightOfOutRec2(outRec2, outRec1))
+      holeStateRec = outRec1;
+    else
+      holeStateRec = GetLowermostRec(outRec1, outRec2);
+    if (!JoinPoints(join, outRec1, outRec2))
+      continue;
+    if (outRec1 == outRec2) {
+      // instead of joining two polygons, we've just created a new one by
+      // splitting one polygon into two.
+      outRec1->Pts = join->OutPt1;
+      outRec1->BottomPt = 0;
+      outRec2 = CreateOutRec();
+      outRec2->Pts = join->OutPt2;
+      // update all OutRec2.Pts Idx's ...
+      UpdateOutPtIdxs(*outRec2);
+      if (Poly2ContainsPoly1(outRec2->Pts, outRec1->Pts)) {
+        // outRec1 contains outRec2 ...
+        outRec2->IsHole = !outRec1->IsHole;
+        outRec2->FirstLeft = outRec1;
+        if (m_UsingPolyTree)
+          FixupFirstLefts2(outRec2, outRec1);
+        if ((outRec2->IsHole ^ m_ReverseOutput) == (Area(*outRec2) > 0))
+          ReversePolyPtLinks(outRec2->Pts);
+      } else if (Poly2ContainsPoly1(outRec1->Pts, outRec2->Pts)) {
+        // outRec2 contains outRec1 ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec1->IsHole = !outRec2->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+        outRec1->FirstLeft = outRec2;
+        if (m_UsingPolyTree)
+          FixupFirstLefts2(outRec1, outRec2);
+        if ((outRec1->IsHole ^ m_ReverseOutput) == (Area(*outRec1) > 0))
+          ReversePolyPtLinks(outRec1->Pts);
+      } else {
+        // the 2 polygons are completely separate ...
+        outRec2->IsHole = outRec1->IsHole;
+        outRec2->FirstLeft = outRec1->FirstLeft;
+        // fixup FirstLeft pointers that may need reassigning to OutRec2
+        if (m_UsingPolyTree)
+          FixupFirstLefts1(outRec1, outRec2);
+      }
+    } else {
+      // joined 2 polygons together ...
+      outRec2->Pts = 0;
+      outRec2->BottomPt = 0;
+      outRec2->Idx = outRec1->Idx;
+      outRec1->IsHole = holeStateRec->IsHole;
+      if (holeStateRec == outRec2)
+        outRec1->FirstLeft = outRec2->FirstLeft;
+      outRec2->FirstLeft = outRec1;
+      if (m_UsingPolyTree)
+        FixupFirstLefts3(outRec2, outRec1);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+// ClipperOffset support functions ...
+//------------------------------------------------------------------------------
+DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2) {
+  if (pt2.X == pt1.X && pt2.Y == pt1.Y)
+    return DoublePoint(0, 0);
+  double Dx = (double)(pt2.X - pt1.X);
+  double dy = (double)(pt2.Y - pt1.Y);
+  double f = 1 * 1.0 / std::sqrt(Dx * Dx + dy * dy);
+  Dx *= f;
+  dy *= f;
+  return DoublePoint(dy, -Dx);
+}
+//------------------------------------------------------------------------------
+// ClipperOffset class
+//------------------------------------------------------------------------------
+ClipperOffset::ClipperOffset(double miterLimit, double arcTolerance) {
+  this->MiterLimit = miterLimit;
+  this->ArcTolerance = arcTolerance;
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+ClipperOffset::~ClipperOffset() { Clear(); }
+//------------------------------------------------------------------------------
+void ClipperOffset::Clear() {
+  for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
+    delete m_polyNodes.Childs[i];
+  m_polyNodes.Childs.clear();
+  m_lowest.X = -1;
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::AddPath(const Path &path, JoinType joinType,
+                            EndType endType) {
+  int highI = (int)path.size() - 1;
+  if (highI < 0)
+    return;
+  PolyNode *newNode = new PolyNode();
+  newNode->m_jointype = joinType;
+  newNode->m_endtype = endType;
+  // strip duplicate points from path and also get index to the lowest point ...
+  if (endType == etClosedLine || endType == etClosedPolygon)
+    while (highI > 0 && path[0] == path[highI])
+      highI--;
+  newNode->Contour.reserve(highI + 1);
+  newNode->Contour.push_back(path[0]);
+  int j = 0, k = 0;
+  for (int i = 1; i <= highI; i++)
+    if (newNode->Contour[j] != path[i]) {
+      j++;
+      newNode->Contour.push_back(path[i]);
+      if (path[i].Y > newNode->Contour[k].Y ||
+          (path[i].Y == newNode->Contour[k].Y &&
+           path[i].X < newNode->Contour[k].X))
+        k = j;
+    }
+  if (endType == etClosedPolygon && j < 2) {
+    delete newNode;
+    return;
+  }
+  m_polyNodes.AddChild(*newNode);
+  // if this path's lowest pt is lower than all the others then update m_lowest
+  if (endType != etClosedPolygon)
+    return;
+  if (m_lowest.X < 0)
+    m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  else {
+    IntPoint ip = m_polyNodes.Childs[(int)m_lowest.X]->Contour[(int)m_lowest.Y];
+    if (newNode->Contour[k].Y > ip.Y ||
+        (newNode->Contour[k].Y == ip.Y && newNode->Contour[k].X < ip.X))
+      m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
+  }
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::AddPaths(const Paths &paths, JoinType joinType,
+                             EndType endType) {
+  for (Paths::size_type i = 0; i < paths.size(); ++i)
+    AddPath(paths[i], joinType, endType);
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::FixOrientations() {
+  // fixup orientations of all closed paths if the orientation of the
+  // closed path with the lowermost vertex is wrong ...
+  if (m_lowest.X >= 0 &&
+      !Orientation(m_polyNodes.Childs[(int)m_lowest.X]->Contour)) {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon ||
+          (node.m_endtype == etClosedLine && Orientation(node.Contour)))
+        ReversePath(node.Contour);
+    }
+  } else {
+    for (int i = 0; i < m_polyNodes.ChildCount(); ++i) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedLine && !Orientation(node.Contour))
+        ReversePath(node.Contour);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::Execute(Paths &solution, double delta) {
+  solution.clear();
+  FixOrientations();
+  DoOffset(delta);
+  // now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0) {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  } else {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    if (solution.size() > 0)
+      solution.erase(solution.begin());
+  }
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::Execute(PolyTree &solution, double delta) {
+  solution.Clear();
+  FixOrientations();
+  DoOffset(delta);
+  // now clean up 'corners' ...
+  Clipper clpr;
+  clpr.AddPaths(m_destPolys, ptSubject, true);
+  if (delta > 0) {
+    clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
+  } else {
+    IntRect r = clpr.GetBounds();
+    Path outer(4);
+    outer[0] = IntPoint(r.left - 10, r.bottom + 10);
+    outer[1] = IntPoint(r.right + 10, r.bottom + 10);
+    outer[2] = IntPoint(r.right + 10, r.top - 10);
+    outer[3] = IntPoint(r.left - 10, r.top - 10);
+    clpr.AddPath(outer, ptSubject, true);
+    clpr.ReverseSolution(true);
+    clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
+    // remove the outer PolyNode rectangle ...
+    if (solution.ChildCount() == 1 && solution.Childs[0]->ChildCount() > 0) {
+      PolyNode *outerNode = solution.Childs[0];
+      solution.Childs.reserve(outerNode->ChildCount());
+      solution.Childs[0] = outerNode->Childs[0];
+      solution.Childs[0]->Parent = outerNode->Parent;
+      for (int i = 1; i < outerNode->ChildCount(); ++i)
+        solution.AddChild(*outerNode->Childs[i]);
+    } else
+      solution.Clear();
+  }
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::DoOffset(double delta) {
+  m_destPolys.clear();
+  m_delta = delta;
+  // if Zero offset, just copy any CLOSED polygons to m_p and return ...
+  if (NEAR_ZERO(delta)) {
+    m_destPolys.reserve(m_polyNodes.ChildCount());
+    for (int i = 0; i < m_polyNodes.ChildCount(); i++) {
+      PolyNode &node = *m_polyNodes.Childs[i];
+      if (node.m_endtype == etClosedPolygon)
+        m_destPolys.push_back(node.Contour);
+    }
+    return;
+  }
+  // see offset_triginometry3.svg in the documentation folder ...
+  if (MiterLimit > 2)
+    m_miterLim = 2 / (MiterLimit * MiterLimit);
+  else
+    m_miterLim = 0.5;
+  double y;
+  if (ArcTolerance <= 0.0)
+    y = def_arc_tolerance;
+  else if (ArcTolerance > std::fabs(delta) * def_arc_tolerance)
+    y = std::fabs(delta) * def_arc_tolerance;
+  else
+    y = ArcTolerance;
+  // see offset_triginometry2.svg in the documentation folder ...
+  double steps = pi / std::acos(1 - y / std::fabs(delta));
+  if (steps > std::fabs(delta) * pi)
+    steps = std::fabs(delta) * pi; // ie excessive precision check
+  m_sin = std::sin(two_pi / steps);
+  m_cos = std::cos(two_pi / steps);
+  m_StepsPerRad = steps / two_pi;
+  if (delta < 0.0)
+    m_sin = -m_sin;
+  m_destPolys.reserve(m_polyNodes.ChildCount() * 2);
+  for (int i = 0; i < m_polyNodes.ChildCount(); i++) {
+    PolyNode &node = *m_polyNodes.Childs[i];
+    m_srcPoly = node.Contour;
+    int len = (int)m_srcPoly.size();
+    if (len == 0 ||
+        (delta <= 0 && (len < 3 || node.m_endtype != etClosedPolygon)))
+      continue;
+    m_destPoly.clear();
+    if (len == 1) {
+      if (node.m_jointype == jtRound) {
+        double X = 1.0, Y = 0.0;
+        for (cInt j = 1; j <= steps; j++) {
+          m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta),
+                                        Round(m_srcPoly[0].Y + Y * delta)));
+          double X2 = X;
+          X = X * m_cos - m_sin * Y;
+          Y = X2 * m_sin + Y * m_cos;
+        }
+      } else {
+        double X = -1.0, Y = -1.0;
+        for (int j = 0; j < 4; ++j) {
+          m_destPoly.push_back(IntPoint(Round(m_srcPoly[0].X + X * delta),
+                                        Round(m_srcPoly[0].Y + Y * delta)));
+          if (X < 0)
+            X = 1;
+          else if (Y < 0)
+            Y = 1;
+          else
+            X = -1;
+        }
+      }
+      m_destPolys.push_back(m_destPoly);
+      continue;
+    }
+    // build m_normals ...
+    m_normals.clear();
+    m_normals.reserve(len);
+    for (int j = 0; j < len - 1; ++j)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[j], m_srcPoly[j + 1]));
+    if (node.m_endtype == etClosedLine || node.m_endtype == etClosedPolygon)
+      m_normals.push_back(GetUnitNormal(m_srcPoly[len - 1], m_srcPoly[0]));
+    else
+      m_normals.push_back(DoublePoint(m_normals[len - 2]));
+    if (node.m_endtype == etClosedPolygon) {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    } else if (node.m_endtype == etClosedLine) {
+      int k = len - 1;
+      for (int j = 0; j < len; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+      m_destPoly.clear();
+      // re-build m_normals ...
+      DoublePoint n = m_normals[len - 1];
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-n.X, -n.Y);
+      k = 0;
+      for (int j = len - 1; j >= 0; j--)
+        OffsetPoint(j, k, node.m_jointype);
+      m_destPolys.push_back(m_destPoly);
+    } else {
+      int k = 0;
+      for (int j = 1; j < len - 1; ++j)
+        OffsetPoint(j, k, node.m_jointype);
+      IntPoint pt1;
+      if (node.m_endtype == etOpenButt) {
+        int j = len - 1;
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X + m_normals[j].X * delta),
+                       (cInt)Round(m_srcPoly[j].Y + m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[j].X - m_normals[j].X * delta),
+                       (cInt)Round(m_srcPoly[j].Y - m_normals[j].Y * delta));
+        m_destPoly.push_back(pt1);
+      } else {
+        int j = len - 1;
+        k = len - 2;
+        m_sinA = 0;
+        m_normals[j] = DoublePoint(-m_normals[j].X, -m_normals[j].Y);
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(j, k);
+        else
+          DoRound(j, k);
+      }
+      // re-build m_normals ...
+      for (int j = len - 1; j > 0; j--)
+        m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
+      m_normals[0] = DoublePoint(-m_normals[1].X, -m_normals[1].Y);
+      k = len - 1;
+      for (int j = k - 1; j > 0; --j)
+        OffsetPoint(j, k, node.m_jointype);
+      if (node.m_endtype == etOpenButt) {
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X - m_normals[0].X * delta),
+                       (cInt)Round(m_srcPoly[0].Y - m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+        pt1 = IntPoint((cInt)Round(m_srcPoly[0].X + m_normals[0].X * delta),
+                       (cInt)Round(m_srcPoly[0].Y + m_normals[0].Y * delta));
+        m_destPoly.push_back(pt1);
+      } else {
+        k = 1;
+        m_sinA = 0;
+        if (node.m_endtype == etOpenSquare)
+          DoSquare(0, 1);
+        else
+          DoRound(0, 1);
+      }
+      m_destPolys.push_back(m_destPoly);
+    }
+  }
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::OffsetPoint(int j, int &k, JoinType jointype) {
+  // cross product ...
+  m_sinA = (m_normals[k].X * m_normals[j].Y - m_normals[j].X * m_normals[k].Y);
+  if (std::fabs(m_sinA * m_delta) < 1.0) {
+    // dot product ...
+    double cosA =
+        (m_normals[k].X * m_normals[j].X + m_normals[j].Y * m_normals[k].Y);
+    if (cosA > 0) // angle => 0 degrees
+    {
+      m_destPoly.push_back(
+          IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+                   Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+      return;
+    }
+    // else angle => 180 degrees
+  } else if (m_sinA > 1.0)
+    m_sinA = 1.0;
+  else if (m_sinA < -1.0)
+    m_sinA = -1.0;
+  if (m_sinA * m_delta < 0) {
+    m_destPoly.push_back(
+        IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
+                 Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
+    m_destPoly.push_back(m_srcPoly[j]);
+    m_destPoly.push_back(
+        IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+                 Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+  } else
+    switch (jointype) {
+    case jtMiter: {
+      double r = 1 + (m_normals[j].X * m_normals[k].X +
+                      m_normals[j].Y * m_normals[k].Y);
+      if (r >= m_miterLim)
+        DoMiter(j, k, r);
+      else
+        DoSquare(j, k);
+      break;
+    }
+    case jtSquare:
+      DoSquare(j, k);
+      break;
+    case jtRound:
+      DoRound(j, k);
+      break;
+    }
+  k = j;
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::DoSquare(int j, int k) {
+  double dx = std::tan(std::atan2(m_sinA, m_normals[k].X * m_normals[j].X +
+                                              m_normals[k].Y * m_normals[j].Y) /
+                       4);
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[k].X - m_normals[k].Y * dx)),
+      Round(m_srcPoly[j].Y +
+            m_delta * (m_normals[k].Y + m_normals[k].X * dx))));
+  m_destPoly.push_back(IntPoint(
+      Round(m_srcPoly[j].X + m_delta * (m_normals[j].X + m_normals[j].Y * dx)),
+      Round(m_srcPoly[j].Y +
+            m_delta * (m_normals[j].Y - m_normals[j].X * dx))));
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::DoMiter(int j, int k, double r) {
+  double q = m_delta / r;
+  m_destPoly.push_back(
+      IntPoint(Round(m_srcPoly[j].X + (m_normals[k].X + m_normals[j].X) * q),
+               Round(m_srcPoly[j].Y + (m_normals[k].Y + m_normals[j].Y) * q)));
+}
+//------------------------------------------------------------------------------
+void ClipperOffset::DoRound(int j, int k) {
+  double a = std::atan2(m_sinA, m_normals[k].X * m_normals[j].X +
+                                    m_normals[k].Y * m_normals[j].Y);
+  int steps = std::max((int)Round(m_StepsPerRad * std::fabs(a)), 1);
+  double X = m_normals[k].X, Y = m_normals[k].Y, X2;
+  for (int i = 0; i < steps; ++i) {
+    m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + X * m_delta),
+                                  Round(m_srcPoly[j].Y + Y * m_delta)));
+    X2 = X;
+    X = X * m_cos - m_sin * Y;
+    Y = X2 * m_sin + Y * m_cos;
+  }
+  m_destPoly.push_back(
+      IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
+               Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
+}
+//------------------------------------------------------------------------------
+// Miscellaneous public functions
+//------------------------------------------------------------------------------
+void Clipper::DoSimplePolygons() {
+  PolyOutList::size_type i = 0;
+  while (i < m_PolyOuts.size()) {
+    OutRec *outrec = m_PolyOuts[i++];
+    OutPt *op = outrec->Pts;
+    if (!op || outrec->IsOpen)
+      continue;
+    do // for each Pt in Polygon until duplicate found do ...
+    {
+      OutPt *op2 = op->Next;
+      while (op2 != outrec->Pts) {
+        if ((op->Pt == op2->Pt) && op2->Next != op && op2->Prev != op) {
+          // split the polygon into two ...
+          OutPt *op3 = op->Prev;
+          OutPt *op4 = op2->Prev;
+          op->Prev = op4;
+          op4->Next = op;
+          op2->Prev = op3;
+          op3->Next = op2;
+          outrec->Pts = op;
+          OutRec *outrec2 = CreateOutRec();
+          outrec2->Pts = op2;
+          UpdateOutPtIdxs(*outrec2);
+          if (Poly2ContainsPoly1(outrec2->Pts, outrec->Pts)) {
+            // OutRec2 is contained by OutRec1 ...
+            outrec2->IsHole = !outrec->IsHole;
+            outrec2->FirstLeft = outrec;
+            if (m_UsingPolyTree)
+              FixupFirstLefts2(outrec2, outrec);
+          } else if (Poly2ContainsPoly1(outrec->Pts, outrec2->Pts)) {
+            // OutRec1 is contained by OutRec2 ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec->IsHole = !outrec2->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            outrec->FirstLeft = outrec2;
+            if (m_UsingPolyTree)
+              FixupFirstLefts2(outrec, outrec2);
+          } else {
+            // the 2 polygons are separate ...
+            outrec2->IsHole = outrec->IsHole;
+            outrec2->FirstLeft = outrec->FirstLeft;
+            if (m_UsingPolyTree)
+              FixupFirstLefts1(outrec, outrec2);
+          }
+          op2 = op; // ie get ready for the Next iteration
+        }
+        op2 = op2->Next;
+      }
+      op = op->Next;
+    } while (op != outrec->Pts);
+  }
+}
+//------------------------------------------------------------------------------
+void ReversePath(Path &p) { std::reverse(p.begin(), p.end()); }
+//------------------------------------------------------------------------------
+void ReversePaths(Paths &p) {
+  for (Paths::size_type i = 0; i < p.size(); ++i)
+    ReversePath(p[i]);
+}
+//------------------------------------------------------------------------------
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
+                     PolyFillType fillType) {
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPath(in_poly, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
+                      PolyFillType fillType) {
+  Clipper c;
+  c.StrictlySimple(true);
+  c.AddPaths(in_polys, ptSubject, true);
+  c.Execute(ctUnion, out_polys, fillType, fillType);
+}
+//------------------------------------------------------------------------------
+void SimplifyPolygons(Paths &polys, PolyFillType fillType) {
+  SimplifyPolygons(polys, polys, fillType);
+}
+//------------------------------------------------------------------------------
+inline double DistanceSqrd(const IntPoint &pt1, const IntPoint &pt2) {
+  double Dx = ((double)pt1.X - pt2.X);
+  double dy = ((double)pt1.Y - pt2.Y);
+  return (Dx * Dx + dy * dy);
+}
+//------------------------------------------------------------------------------
+double DistanceFromLineSqrd(const IntPoint &pt, const IntPoint &ln1,
+                            const IntPoint &ln2) {
+  // The equation of a line in general form (Ax + By + C = 0)
+  // given 2 points (x�,y�) & (x�,y�) is ...
+  //(y� - y�)x + (x� - x�)y + (y� - y�)x� - (x� - x�)y� = 0
+  // A = (y� - y�); B = (x� - x�); C = (y� - y�)x� - (x� - x�)y�
+  // perpendicular distance of point (x�,y�) = (Ax� + By� + C)/Sqrt(A� + B�)
+  // see http://en.wikipedia.org/wiki/Perpendicular_distance
+  double A = double(ln1.Y - ln2.Y);
+  double B = double(ln2.X - ln1.X);
+  double C = A * ln1.X + B * ln1.Y;
+  C = A * pt.X + B * pt.Y - C;
+  return (C * C) / (A * A + B * B);
+}
+//---------------------------------------------------------------------------
+bool SlopesNearCollinear(const IntPoint &pt1, const IntPoint &pt2,
+                         const IntPoint &pt3, double distSqrd) {
+  // this function is more accurate when the point that's geometrically
+  // between the other 2 points is the one that's tested for distance.
+  // ie makes it more likely to pick up 'spikes' ...
+  if (Abs(pt1.X - pt2.X) > Abs(pt1.Y - pt2.Y)) {
+    if ((pt1.X > pt2.X) == (pt1.X < pt3.X))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.X > pt1.X) == (pt2.X < pt3.X))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+    else
+      return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+  } else {
+    if ((pt1.Y > pt2.Y) == (pt1.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
+    else if ((pt2.Y > pt1.Y) == (pt2.Y < pt3.Y))
+      return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
+    else
+      return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
+  }
+}
+//------------------------------------------------------------------------------
+bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd) {
+  double Dx = (double)pt1.X - pt2.X;
+  double dy = (double)pt1.Y - pt2.Y;
+  return ((Dx * Dx) + (dy * dy) <= distSqrd);
+}
+//------------------------------------------------------------------------------
+OutPt *ExcludeOp(OutPt *op) {
+  OutPt *result = op->Prev;
+  result->Next = op->Next;
+  op->Next->Prev = result;
+  result->Idx = 0;
+  return result;
+}
+//------------------------------------------------------------------------------
+void CleanPolygon(const Path &in_poly, Path &out_poly, double distance) {
+  // distance = proximity in units/pixels below which vertices
+  // will be stripped. Default ~= sqrt(2).
+  size_t size = in_poly.size();
+  if (size == 0) {
+    out_poly.clear();
+    return;
+  }
+  OutPt *outPts = new OutPt[size];
+  for (size_t i = 0; i < size; ++i) {
+    outPts[i].Pt = in_poly[i];
+    outPts[i].Next = &outPts[(i + 1) % size];
+    outPts[i].Next->Prev = &outPts[i];
+    outPts[i].Idx = 0;
+  }
+  double distSqrd = distance * distance;
+  OutPt *op = &outPts[0];
+  while (op->Idx == 0 && op->Next != op->Prev) {
+    if (PointsAreClose(op->Pt, op->Prev->Pt, distSqrd)) {
+      op = ExcludeOp(op);
+      size--;
+    } else if (PointsAreClose(op->Prev->Pt, op->Next->Pt, distSqrd)) {
+      ExcludeOp(op->Next);
+      op = ExcludeOp(op);
+      size -= 2;
+    } else if (SlopesNearCollinear(op->Prev->Pt, op->Pt, op->Next->Pt,
+                                   distSqrd)) {
+      op = ExcludeOp(op);
+      size--;
+    } else {
+      op->Idx = 1;
+      op = op->Next;
+    }
+  }
+  if (size < 3)
+    size = 0;
+  out_poly.resize(size);
+  for (size_t i = 0; i < size; ++i) {
+    out_poly[i] = op->Pt;
+    op = op->Next;
+  }
+  delete[] outPts;
+}
+//------------------------------------------------------------------------------
+void CleanPolygon(Path &poly, double distance) {
+  CleanPolygon(poly, poly, distance);
+}
+//------------------------------------------------------------------------------
+void CleanPolygons(const Paths &in_polys, Paths &out_polys, double distance) {
+  out_polys.resize(in_polys.size());
+  for (Paths::size_type i = 0; i < in_polys.size(); ++i)
+    CleanPolygon(in_polys[i], out_polys[i], distance);
+}
+//------------------------------------------------------------------------------
+void CleanPolygons(Paths &polys, double distance) {
+  CleanPolygons(polys, polys, distance);
+}
+//------------------------------------------------------------------------------
+void Minkowski(const Path &poly, const Path &path, Paths &solution, bool isSum,
+               bool isClosed) {
+  int delta = (isClosed ? 1 : 0);
+  size_t polyCnt = poly.size();
+  size_t pathCnt = path.size();
+  Paths pp;
+  pp.reserve(pathCnt);
+  if (isSum)
+    for (size_t i = 0; i < pathCnt; ++i) {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X + poly[j].X, path[i].Y + poly[j].Y));
+      pp.push_back(p);
+    }
+  else
+    for (size_t i = 0; i < pathCnt; ++i) {
+      Path p;
+      p.reserve(polyCnt);
+      for (size_t j = 0; j < poly.size(); ++j)
+        p.push_back(IntPoint(path[i].X - poly[j].X, path[i].Y - poly[j].Y));
+      pp.push_back(p);
+    }
+  solution.clear();
+  solution.reserve((pathCnt + delta) * (polyCnt + 1));
+  for (size_t i = 0; i < pathCnt - 1 + delta; ++i)
+    for (size_t j = 0; j < polyCnt; ++j) {
+      Path quad;
+      quad.reserve(4);
+      quad.push_back(pp[i % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][j % polyCnt]);
+      quad.push_back(pp[(i + 1) % pathCnt][(j + 1) % polyCnt]);
+      quad.push_back(pp[i % pathCnt][(j + 1) % polyCnt]);
+      if (!Orientation(quad))
+        ReversePath(quad);
+      solution.push_back(quad);
+    }
+}
+//------------------------------------------------------------------------------
+void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
+                  bool pathIsClosed) {
+  Minkowski(pattern, path, solution, true, pathIsClosed);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+void TranslatePath(const Path &input, Path &output, const IntPoint delta) {
+  // precondition: input != output
+  output.resize(input.size());
+  for (size_t i = 0; i < input.size(); ++i)
+    output[i] = IntPoint(input[i].X + delta.X, input[i].Y + delta.Y);
+}
+//------------------------------------------------------------------------------
+void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
+                  bool pathIsClosed) {
+  Clipper c;
+  for (size_t i = 0; i < paths.size(); ++i) {
+    Paths tmp;
+    Minkowski(pattern, paths[i], tmp, true, pathIsClosed);
+    c.AddPaths(tmp, ptSubject, true);
+    if (pathIsClosed) {
+      Path tmp2;
+      TranslatePath(paths[i], tmp2, pattern[0]);
+      c.AddPath(tmp2, ptClip, true);
+    }
+  }
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution) {
+  Minkowski(poly1, poly2, solution, false, true);
+  Clipper c;
+  c.AddPaths(solution, ptSubject, true);
+  c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
+}
+//------------------------------------------------------------------------------
+enum NodeType { ntAny, ntOpen, ntClosed };
+void AddPolyNodeToPaths(const PolyNode &polynode, NodeType nodetype,
+                        Paths &paths) {
+  bool match = true;
+  if (nodetype == ntClosed)
+    match = !polynode.IsOpen();
+  else if (nodetype == ntOpen)
+    return;
+  if (!polynode.Contour.empty() && match)
+    paths.push_back(polynode.Contour);
+  for (int i = 0; i < polynode.ChildCount(); ++i)
+    AddPolyNodeToPaths(*polynode.Childs[i], nodetype, paths);
+}
+//------------------------------------------------------------------------------
+void PolyTreeToPaths(const PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntAny, paths);
+}
+//------------------------------------------------------------------------------
+void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  AddPolyNodeToPaths(polytree, ntClosed, paths);
+}
+//------------------------------------------------------------------------------
+void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths) {
+  paths.resize(0);
+  paths.reserve(polytree.Total());
+  // Open paths are top level only, so ...
+  for (int i = 0; i < polytree.ChildCount(); ++i)
+    if (polytree.Childs[i]->IsOpen())
+      paths.push_back(polytree.Childs[i]->Contour);
+}
+//------------------------------------------------------------------------------
+std::ostream &operator<<(std::ostream &s, const IntPoint &p) {
+  s << "(" << p.X << "," << p.Y << ")";
+  return s;
+}
+//------------------------------------------------------------------------------
+std::ostream &operator<<(std::ostream &s, const Path &p) {
+  if (p.empty())
+    return s;
+  Path::size_type last = p.size() - 1;
+  for (Path::size_type i = 0; i < last; i++)
+    s << "(" << p[i].X << "," << p[i].Y << "), ";
+  s << "(" << p[last].X << "," << p[last].Y << ")\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+std::ostream &operator<<(std::ostream &s, const Paths &p) {
+  for (Paths::size_type i = 0; i < p.size(); i++)
+    s << p[i];
+  s << "\n";
+  return s;
+}
+//------------------------------------------------------------------------------
+} // ClipperLib namespace
--- a/core/predictor/tools/ocrtools/clipper.h
+++ b/core/predictor/tools/ocrtools/clipper.h
+/*******************************************************************************
+*                                                                              *
+* Author    :  Angus Johnson                                                   *
+* Version   :  6.4.2                                                           *
+* Date      :  27 February 2017                                                *
+* Website   :  http://www.angusj.com                                           *
+* Copyright :  Angus Johnson 2010-2017                                         *
+*                                                                              *
+* License:                                                                     *
+* Use, modification & distribution is subject to Boost Software License Ver 1. *
+* http://www.boost.org/LICENSE_1_0.txt                                         *
+*                                                                              *
+* Attributions:                                                                *
+* The code in this library is an extension of Bala Vatti's clipping algorithm: *
+* "A generic solution to polygon clipping"                                     *
+* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63.             *
+* http://portal.acm.org/citation.cfm?id=129906                                 *
+*                                                                              *
+* Computer graphics and geometric modeling: implementation and algorithms      *
+* By Max K. Agoston                                                            *
+* Springer; 1 edition (January 4, 2005)                                        *
+* http://books.google.com/books?q=vatti+clipping+agoston                       *
+*                                                                              *
+* See also:                                                                    *
+* "Polygon Offsetting by Computing Winding Numbers"                            *
+* Paper no. DETC2005-85513 pp. 565-575                                         *
+* ASME 2005 International Design Engineering Technical Conferences             *
+* and Computers and Information in Engineering Conference (IDETC/CIE2005)      *
+* September 24-28, 2005 , Long Beach, California, USA                          *
+* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf              *
+*                                                                              *
+*******************************************************************************/
+#ifndef clipper_hpp
+#define clipper_hpp
+#define CLIPPER_VERSION "6.4.2"
+// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
+// improve performance but coordinate values are limited to the range +/- 46340
+//#define use_int32
+// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
+//#define use_xyz
+// use_lines: Enables line clipping. Adds a very minor cost to performance.
+#define use_lines
+// use_deprecated: Enables temporary support for the obsolete functions
+//#define use_deprecated
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <list>
+#include <ostream>
+#include <queue>
+#include <set>
+#include <stdexcept>
+#include <vector>
+namespace ClipperLib {
+enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
+enum PolyType { ptSubject, ptClip };
+// By far the most widely used winding rules for polygon filling are
+// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
+// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
+// see http://glprogramming.com/red/chapter11.html
+enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
+#ifdef use_int32
+typedef int cInt;
+static cInt const loRange = 0x7FFF;
+static cInt const hiRange = 0x7FFF;
+#else
+typedef signed long long cInt;
+static cInt const loRange = 0x3FFFFFFF;
+static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
+typedef signed long long long64; // used by Int128 class
+typedef unsigned long long ulong64;
+#endif
+struct IntPoint {
+  cInt X;
+  cInt Y;
+#ifdef use_xyz
+  cInt Z;
+  IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
+#else
+  IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
+#endif
+  friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
+    return a.X == b.X && a.Y == b.Y;
+  }
+  friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
+    return a.X != b.X || a.Y != b.Y;
+  }
+};
+//------------------------------------------------------------------------------
+typedef std::vector<IntPoint> Path;
+typedef std::vector<Path> Paths;
+inline Path &operator<<(Path &poly, const IntPoint &p) {
+  poly.push_back(p);
+  return poly;
+}
+inline Paths &operator<<(Paths &polys, const Path &p) {
+  polys.push_back(p);
+  return polys;
+}
+std::ostream &operator<<(std::ostream &s, const IntPoint &p);
+std::ostream &operator<<(std::ostream &s, const Path &p);
+std::ostream &operator<<(std::ostream &s, const Paths &p);
+struct DoublePoint {
+  double X;
+  double Y;
+  DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
+  DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
+};
+//------------------------------------------------------------------------------
+#ifdef use_xyz
+typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
+                              IntPoint &e2top, IntPoint &pt);
+#endif
+enum InitOptions {
+  ioReverseSolution = 1,
+  ioStrictlySimple = 2,
+  ioPreserveCollinear = 4
+};
+enum JoinType { jtSquare, jtRound, jtMiter };
+enum EndType {
+  etClosedPolygon,
+  etClosedLine,
+  etOpenButt,
+  etOpenSquare,
+  etOpenRound
+};
+class PolyNode;
+typedef std::vector<PolyNode *> PolyNodes;
+class PolyNode {
+public:
+  PolyNode();
+  virtual ~PolyNode(){};
+  Path Contour;
+  PolyNodes Childs;
+  PolyNode *Parent;
+  PolyNode *GetNext() const;
+  bool IsHole() const;
+  bool IsOpen() const;
+  int ChildCount() const;
+private:
+  // PolyNode& operator =(PolyNode& other);
+  unsigned Index; // node index in Parent.Childs
+  bool m_IsOpen;
+  JoinType m_jointype;
+  EndType m_endtype;
+  PolyNode *GetNextSiblingUp() const;
+  void AddChild(PolyNode &child);
+  friend class Clipper; // to access Index
+  friend class ClipperOffset;
+};
+class PolyTree : public PolyNode {
+public:
+  ~PolyTree() { Clear(); };
+  PolyNode *GetFirst() const;
+  void Clear();
+  int Total() const;
+private:
+  // PolyTree& operator =(PolyTree& other);
+  PolyNodes AllNodes;
+  friend class Clipper; // to access AllNodes
+};
+bool Orientation(const Path &poly);
+double Area(const Path &poly);
+int PointInPolygon(const IntPoint &pt, const Path &path);
+void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
+                     PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
+                      PolyFillType fillType = pftEvenOdd);
+void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
+void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
+void CleanPolygon(Path &poly, double distance = 1.415);
+void CleanPolygons(const Paths &in_polys, Paths &out_polys,
+                   double distance = 1.415);
+void CleanPolygons(Paths &polys, double distance = 1.415);
+void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
+                  bool pathIsClosed);
+void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
+                  bool pathIsClosed);
+void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
+void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
+void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
+void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
+void ReversePath(Path &p);
+void ReversePaths(Paths &p);
+struct IntRect {
+  cInt left;
+  cInt top;
+  cInt right;
+  cInt bottom;
+};
+// enums that are used internally ...
+enum EdgeSide { esLeft = 1, esRight = 2 };
+// forward declarations (for stuff used internally) ...
+struct TEdge;
+struct IntersectNode;
+struct LocalMinimum;
+struct OutPt;
+struct OutRec;
+struct Join;
+typedef std::vector<OutRec *> PolyOutList;
+typedef std::vector<TEdge *> EdgeList;
+typedef std::vector<Join *> JoinList;
+typedef std::vector<IntersectNode *> IntersectList;
+//------------------------------------------------------------------------------
+// ClipperBase is the ancestor to the Clipper class. It should not be
+// instantiated directly. This class simply abstracts the conversion of sets of
+// polygon coordinates into edge objects that are stored in a LocalMinima list.
+class ClipperBase {
+public:
+  ClipperBase();
+  virtual ~ClipperBase();
+  virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
+  bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
+  virtual void Clear();
+  IntRect GetBounds();
+  bool PreserveCollinear() { return m_PreserveCollinear; };
+  void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
+protected:
+  void DisposeLocalMinimaList();
+  TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
+  virtual void Reset();
+  TEdge *ProcessBound(TEdge *E, bool IsClockwise);
+  void InsertScanbeam(const cInt Y);
+  bool PopScanbeam(cInt &Y);
+  bool LocalMinimaPending();
+  bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
+  OutRec *CreateOutRec();
+  void DisposeAllOutRecs();
+  void DisposeOutRec(PolyOutList::size_type index);
+  void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
+  void DeleteFromAEL(TEdge *e);
+  void UpdateEdgeIntoAEL(TEdge *&e);
+  typedef std::vector<LocalMinimum> MinimaList;
+  MinimaList::iterator m_CurrentLM;
+  MinimaList m_MinimaList;
+  bool m_UseFullRange;
+  EdgeList m_edges;
+  bool m_PreserveCollinear;
+  bool m_HasOpenPaths;
+  PolyOutList m_PolyOuts;
+  TEdge *m_ActiveEdges;
+  typedef std::priority_queue<cInt> ScanbeamList;
+  ScanbeamList m_Scanbeam;
+};
+//------------------------------------------------------------------------------
+class Clipper : public virtual ClipperBase {
+public:
+  Clipper(int initOptions = 0);
+  bool Execute(ClipType clipType, Paths &solution,
+               PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
+               PolyFillType clipFillType);
+  bool Execute(ClipType clipType, PolyTree &polytree,
+               PolyFillType fillType = pftEvenOdd);
+  bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
+               PolyFillType clipFillType);
+  bool ReverseSolution() { return m_ReverseOutput; };
+  void ReverseSolution(bool value) { m_ReverseOutput = value; };
+  bool StrictlySimple() { return m_StrictSimple; };
+  void StrictlySimple(bool value) { m_StrictSimple = value; };
+// set the callback function for z value filling on intersections (otherwise Z
+// is 0)
+#ifdef use_xyz
+  void ZFillFunction(ZFillCallback zFillFunc);
+#endif
+protected:
+  virtual bool ExecuteInternal();
+private:
+  JoinList m_Joins;
+  JoinList m_GhostJoins;
+  IntersectList m_IntersectList;
+  ClipType m_ClipType;
+  typedef std::list<cInt> MaximaList;
+  MaximaList m_Maxima;
+  TEdge *m_SortedEdges;
+  bool m_ExecuteLocked;
+  PolyFillType m_ClipFillType;
+  PolyFillType m_SubjFillType;
+  bool m_ReverseOutput;
+  bool m_UsingPolyTree;
+  bool m_StrictSimple;
+#ifdef use_xyz
+  ZFillCallback m_ZFill; // custom callback
+#endif
+  void SetWindingCount(TEdge &edge);
+  bool IsEvenOddFillType(const TEdge &edge) const;
+  bool IsEvenOddAltFillType(const TEdge &edge) const;
+  void InsertLocalMinimaIntoAEL(const cInt botY);
+  void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
+  void AddEdgeToSEL(TEdge *edge);
+  bool PopEdgeFromSEL(TEdge *&edge);
+  void CopyAELToSEL();
+  void DeleteFromSEL(TEdge *e);
+  void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
+  bool IsContributing(const TEdge &edge) const;
+  bool IsTopHorz(const cInt XPos);
+  void DoMaxima(TEdge *e);
+  void ProcessHorizontals();
+  void ProcessHorizontal(TEdge *horzEdge);
+  void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
+  OutRec *GetOutRec(int idx);
+  void AppendPolygon(TEdge *e1, TEdge *e2);
+  void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
+  OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
+  OutPt *GetLastOutPt(TEdge *e);
+  bool ProcessIntersections(const cInt topY);
+  void BuildIntersectList(const cInt topY);
+  void ProcessIntersectList();
+  void ProcessEdgesAtTopOfScanbeam(const cInt topY);
+  void BuildResult(Paths &polys);
+  void BuildResult2(PolyTree &polytree);
+  void SetHoleState(TEdge *e, OutRec *outrec);
+  void DisposeIntersectNodes();
+  bool FixupIntersectionOrder();
+  void FixupOutPolygon(OutRec &outrec);
+  void FixupOutPolyline(OutRec &outrec);
+  bool IsHole(TEdge *e);
+  bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
+  void FixHoleLinkage(OutRec &outrec);
+  void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
+  void ClearJoins();
+  void ClearGhostJoins();
+  void AddGhostJoin(OutPt *op, const IntPoint offPt);
+  bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
+  void JoinCommonEdges();
+  void DoSimplePolygons();
+  void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
+  void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
+  void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
+#ifdef use_xyz
+  void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
+#endif
+};
+//------------------------------------------------------------------------------
+class ClipperOffset {
+public:
+  ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
+  ~ClipperOffset();
+  void AddPath(const Path &path, JoinType joinType, EndType endType);
+  void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
+  void Execute(Paths &solution, double delta);
+  void Execute(PolyTree &solution, double delta);
+  void Clear();
+  double MiterLimit;
+  double ArcTolerance;
+private:
+  Paths m_destPolys;
+  Path m_srcPoly;
+  Path m_destPoly;
+  std::vector<DoublePoint> m_normals;
+  double m_delta, m_sinA, m_sin, m_cos;
+  double m_miterLim, m_StepsPerRad;
+  IntPoint m_lowest;
+  PolyNode m_polyNodes;
+  void FixOrientations();
+  void DoOffset(double delta);
+  void OffsetPoint(int j, int &k, JoinType jointype);
+  void DoSquare(int j, int k);
+  void DoMiter(int j, int k, double r);
+  void DoRound(int j, int k);
+};
+//------------------------------------------------------------------------------
+class clipperException : public std::exception {
+public:
+  clipperException(const char *description) : m_descr(description) {}
+  virtual ~clipperException() throw() {}
+  virtual const char *what() const throw() { return m_descr.c_str(); }
+private:
+  std::string m_descr;
+};
+//------------------------------------------------------------------------------
+} // ClipperLib namespace
+#endif // clipper_hpp
--- a/core/predictor/tools/ocrtools/postprocess_op.cpp
+++ b/core/predictor/tools/ocrtools/postprocess_op.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "postprocess_op.h"
+namespace PaddleOCR {
+void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
+                                   float unclip_ratio, float &distance) {
+  int pts_num = 4;
+  float area = 0.0f;
+  float dist = 0.0f;
+  for (int i = 0; i < pts_num; i++) {
+    area += box[i][0] * box[(i + 1) % pts_num][1] -
+            box[i][1] * box[(i + 1) % pts_num][0];
+    dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
+                      (box[i][0] - box[(i + 1) % pts_num][0]) +
+                  (box[i][1] - box[(i + 1) % pts_num][1]) *
+                      (box[i][1] - box[(i + 1) % pts_num][1]));
+  }
+  area = fabs(float(area / 2.0));
+  distance = area * unclip_ratio / dist;
+}
+cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
+                                      const float &unclip_ratio) {
+  float distance = 1.0;
+  GetContourArea(box, unclip_ratio, distance);
+  ClipperLib::ClipperOffset offset;
+  ClipperLib::Path p;
+  p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
+    << ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
+    << ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
+    << ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
+  offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
+  ClipperLib::Paths soln;
+  offset.Execute(soln, distance);
+  std::vector<cv::Point2f> points;
+  for (int j = 0; j < soln.size(); j++) {
+    for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
+      points.emplace_back(soln[j][i].X, soln[j][i].Y);
+    }
+  }
+  cv::RotatedRect res;
+  if (points.size() <= 0) {
+    res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
+  } else {
+    res = cv::minAreaRect(points);
+  }
+  return res;
+}
+float **PostProcessor::Mat2Vec(cv::Mat mat) {
+  auto **array = new float *[mat.rows];
+  for (int i = 0; i < mat.rows; ++i)
+    array[i] = new float[mat.cols];
+  for (int i = 0; i < mat.rows; ++i) {
+    for (int j = 0; j < mat.cols; ++j) {
+      array[i][j] = mat.at<float>(i, j);
+    }
+  }
+  return array;
+}
+std::vector<std::vector<int>>
+PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
+  std::vector<std::vector<int>> box = pts;
+  std::sort(box.begin(), box.end(), XsortInt);
+  std::vector<std::vector<int>> leftmost = {box[0], box[1]};
+  std::vector<std::vector<int>> rightmost = {box[2], box[3]};
+  if (leftmost[0][1] > leftmost[1][1])
+    std::swap(leftmost[0], leftmost[1]);
+  if (rightmost[0][1] > rightmost[1][1])
+    std::swap(rightmost[0], rightmost[1]);
+  std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
+                                        leftmost[1]};
+  return rect;
+}
+std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
+  std::vector<std::vector<float>> img_vec;
+  std::vector<float> tmp;
+  for (int i = 0; i < mat.rows; ++i) {
+    tmp.clear();
+    for (int j = 0; j < mat.cols; ++j) {
+      tmp.push_back(mat.at<float>(i, j));
+    }
+    img_vec.push_back(tmp);
+  }
+  return img_vec;
+}
+bool PostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+bool PostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
+                                                            float &ssid) {
+  ssid = std::max(box.size.width, box.size.height);
+  cv::Mat points;
+  cv::boxPoints(box, points);
+  auto array = Mat2Vector(points);
+  std::sort(array.begin(), array.end(), XsortFp32);
+  std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
+                     idx4 = array[3];
+  if (array[3][1] <= array[2][1]) {
+    idx2 = array[3];
+    idx3 = array[2];
+  } else {
+    idx2 = array[2];
+    idx3 = array[3];
+  }
+  if (array[1][1] <= array[0][1]) {
+    idx1 = array[1];
+    idx4 = array[0];
+  } else {
+    idx1 = array[0];
+    idx4 = array[1];
+  }
+  array[0] = idx1;
+  array[1] = idx2;
+  array[2] = idx3;
+  array[3] = idx4;
+  return array;
+}
+float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
+                                  cv::Mat pred) {
+  auto array = box_array;
+  int width = pred.cols;
+  int height = pred.rows;
+  float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
+  float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
+  int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+  int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+  cv::Mat mask;
+  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
+  cv::Point root_point[4];
+  root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
+  root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
+  root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
+  root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
+  const cv::Point *ppt[1] = {root_point};
+  int npt[] = {4};
+  cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
+  cv::Mat croppedImg;
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
+  auto score = cv::mean(croppedImg, mask)[0];
+  return score;
+}
+std::vector<std::vector<std::vector<int>>>
+PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
+                               const float &box_thresh,
+                               const float &det_db_unclip_ratio) {
+  const int min_size = 3;
+  const int max_candidates = 1000;
+  int width = bitmap.cols;
+  int height = bitmap.rows;
+  std::vector<std::vector<cv::Point>> contours;
+  std::vector<cv::Vec4i> hierarchy;
+  cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
+                   cv::CHAIN_APPROX_SIMPLE);
+  int num_contours =
+      contours.size() >= max_candidates ? max_candidates : contours.size();
+  std::vector<std::vector<std::vector<int>>> boxes;
+  for (int _i = 0; _i < num_contours; _i++) {
+    if (contours[_i].size() <= 2) {
+      continue;
+    }
+    float ssid;
+    cv::RotatedRect box = cv::minAreaRect(contours[_i]);
+    auto array = GetMiniBoxes(box, ssid);
+    auto box_for_unclip = array;
+    // end get_mini_box
+    if (ssid < min_size) {
+      continue;
+    }
+    float score;
+    score = BoxScoreFast(array, pred);
+    if (score < box_thresh)
+      continue;
+    // start for unclip
+    cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio);
+    if (points.size.height < 1.001 && points.size.width < 1.001) {
+      continue;
+    }
+    // end for unclip
+    cv::RotatedRect clipbox = points;
+    auto cliparray = GetMiniBoxes(clipbox, ssid);
+    if (ssid < min_size + 2)
+      continue;
+    int dest_width = pred.cols;
+    int dest_height = pred.rows;
+    std::vector<std::vector<int>> intcliparray;
+    for (int num_pt = 0; num_pt < 4; num_pt++) {
+      std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
+                                           float(dest_width)),
+                                    0, float(dest_width))),
+                         int(clampf(roundf(cliparray[num_pt][1] /
+                                           float(height) * float(dest_height)),
+                                    0, float(dest_height)))};
+      intcliparray.push_back(a);
+    }
+    boxes.push_back(intcliparray);
+  } // end for
+  return boxes;
+}
+std::vector<std::vector<std::vector<int>>>
+PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                               float ratio_h, float ratio_w, cv::Mat srcimg) {
+  int oriimg_h = srcimg.rows;
+  int oriimg_w = srcimg.cols;
+  std::vector<std::vector<std::vector<int>>> root_points;
+  for (int n = 0; n < boxes.size(); n++) {
+    boxes[n] = OrderPointsClockwise(boxes[n]);
+    for (int m = 0; m < boxes[0].size(); m++) {
+      boxes[n][m][0] /= ratio_w;
+      boxes[n][m][1] /= ratio_h;
+      boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
+      boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
+    }
+  }
+  for (int n = 0; n < boxes.size(); n++) {
+    int rect_width, rect_height;
+    rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
+                          pow(boxes[n][0][1] - boxes[n][1][1], 2)));
+    rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
+                           pow(boxes[n][0][1] - boxes[n][3][1], 2)));
+    if (rect_width <= 4 || rect_height <= 4)
+      continue;
+    root_points.push_back(boxes[n]);
+  }
+  return root_points;
+}
+} // namespace PaddleOCR
--- a/core/predictor/tools/ocrtools/postprocess_op.h
+++ b/core/predictor/tools/ocrtools/postprocess_op.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+#include <cstring>
+#include <fstream>
+#include <numeric>
+#include "clipper.h"
+#include "utility.h"
+using namespace std;
+namespace PaddleOCR {
+class PostProcessor {
+public:
+  void GetContourArea(const std::vector<std::vector<float>> &box,
+                      float unclip_ratio, float &distance);
+  cv::RotatedRect UnClip(std::vector<std::vector<float>> box,
+                         const float &unclip_ratio);
+  float **Mat2Vec(cv::Mat mat);
+  std::vector<std::vector<int>>
+  OrderPointsClockwise(std::vector<std::vector<int>> pts);
+  std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box,
+                                               float &ssid);
+  float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
+  std::vector<std::vector<std::vector<int>>>
+  BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
+                  const float &box_thresh, const float &det_db_unclip_ratio);
+  std::vector<std::vector<std::vector<int>>>
+  FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                  float ratio_h, float ratio_w, cv::Mat srcimg);
+private:
+  static bool XsortInt(std::vector<int> a, std::vector<int> b);
+  static bool XsortFp32(std::vector<float> a, std::vector<float> b);
+  std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
+  inline int _max(int a, int b) { return a >= b ? a : b; }
+  inline int _min(int a, int b) { return a >= b ? b : a; }
+  template <class T> inline T clamp(T x, T min, T max) {
+    if (x > max)
+      return max;
+    if (x < min)
+      return min;
+    return x;
+  }
+  inline float clampf(float x, float min, float max) {
+    if (x > max)
+      return max;
+    if (x < min)
+      return min;
+    return x;
+  }
+};
+} // namespace PaddleOCR
--- a/core/predictor/tools/ocrtools/preprocess_op.cpp
+++ b/core/predictor/tools/ocrtools/preprocess_op.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+//#include "paddle_api.h"
+//#include "paddle_inference_api.h"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+#include <cstring>
+#include <fstream>
+#include <numeric>
+#include "preprocess_op.h"
+namespace PaddleOCR {
+void Permute::Run(const cv::Mat *im, float *data) {
+  int rh = im->rows;
+  int rw = im->cols;
+  int rc = im->channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
+  }
+}
+void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
+                    const std::vector<float> &scale, const bool is_scale) {
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  (*im).convertTo(*im, CV_32FC3, e);
+  for (int h = 0; h < im->rows; h++) {
+    for (int w = 0; w < im->cols; w++) {
+      im->at<cv::Vec3f>(h, w)[0] =
+          (im->at<cv::Vec3f>(h, w)[0] - mean[0]) * scale[0];
+      im->at<cv::Vec3f>(h, w)[1] =
+          (im->at<cv::Vec3f>(h, w)[1] - mean[1]) * scale[1];
+      im->at<cv::Vec3f>(h, w)[2] =
+          (im->at<cv::Vec3f>(h, w)[2] - mean[2]) * scale[2];
+    }
+  }
+}
+void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
+                         int max_size_len, float &ratio_h, float &ratio_w,
+                         bool use_tensorrt) {
+  int w = img.cols;
+  int h = img.rows;
+  float ratio = 1.f;
+  int max_wh = w >= h ? w : h;
+  if (max_wh > max_size_len) {
+    if (h > w) {
+      ratio = float(max_size_len) / float(h);
+    } else {
+      ratio = float(max_size_len) / float(w);
+    }
+  }
+  int resize_h = int(float(h) * ratio);
+  int resize_w = int(float(w) * ratio);
+  if (resize_h % 32 == 0)
+    resize_h = resize_h;
+  else if (resize_h / 32 < 1 + 1e-5)
+    resize_h = 32;
+  else
+    resize_h = (resize_h / 32) * 32;
+  if (resize_w % 32 == 0)
+    resize_w = resize_w;
+  else if (resize_w / 32 < 1 + 1e-5)
+    resize_w = 32;
+  else
+    resize_w = (resize_w / 32) * 32;
+  if (!use_tensorrt) {
+    cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
+    ratio_h = float(resize_h) / float(h);
+    ratio_w = float(resize_w) / float(w);
+  } else {
+    cv::resize(img, resize_img, cv::Size(640, 640));
+    ratio_h = float(640) / float(h);
+    ratio_w = float(640) / float(w);
+  }
+}
+void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
+                        bool use_tensorrt,
+                        const std::vector<int> &rec_image_shape) {
+  int imgC, imgH, imgW;
+  imgC = rec_image_shape[0];
+  imgH = rec_image_shape[1];
+  imgW = rec_image_shape[2];
+  imgW = int(32 * wh_ratio);
+  float ratio = float(img.cols) / float(img.rows);
+  int resize_w, resize_h;
+  if (ceilf(imgH * ratio) > imgW)
+    resize_w = imgW;
+  else
+    resize_w = int(ceilf(imgH * ratio));
+  if (!use_tensorrt) {
+    cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
+               cv::INTER_LINEAR);
+    cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
+                       int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
+                       {127, 127, 127});
+  } else {
+    int k = int(img.cols * 32 / img.rows);
+    if (k >= 100) {
+      cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
+                 cv::INTER_LINEAR);
+    } else {
+      cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
+      cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
+                         cv::BORDER_CONSTANT, {127, 127, 127});
+    }
+  }
+}
+void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+                       bool use_tensorrt,
+                       const std::vector<int> &rec_image_shape) {
+  int imgC, imgH, imgW;
+  imgC = rec_image_shape[0];
+  imgH = rec_image_shape[1];
+  imgW = rec_image_shape[2];
+  float ratio = float(img.cols) / float(img.rows);
+  int resize_w, resize_h;
+  if (ceilf(imgH * ratio) > imgW)
+    resize_w = imgW;
+  else
+    resize_w = int(ceilf(imgH * ratio));
+  if (!use_tensorrt) {
+    cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
+               cv::INTER_LINEAR);
+    if (resize_w < imgW) {
+      cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
+                         cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+    }
+  } else {
+    cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
+  }
+}
+} // namespace PaddleOCR
--- a/core/predictor/tools/ocrtools/preprocess_op.h
+++ b/core/predictor/tools/ocrtools/preprocess_op.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+#include <cstring>
+#include <fstream>
+#include <numeric>
+using namespace std;
+//using namespace paddle;
+namespace PaddleOCR {
+class Normalize {
+public:
+  virtual void Run(cv::Mat *im, const std::vector<float> &mean,
+                   const std::vector<float> &scale, const bool is_scale = true);
+};
+// RGB -> CHW
+class Permute {
+public:
+  virtual void Run(const cv::Mat *im, float *data);
+};
+class ResizeImgType0 {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
+                   float &ratio_h, float &ratio_w, bool use_tensorrt);
+};
+class CrnnResizeImg {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
+                   bool use_tensorrt = false,
+                   const std::vector<int> &rec_image_shape = {3, 32, 320});
+};
+class ClsResizeImg {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
+                   bool use_tensorrt = false,
+                   const std::vector<int> &rec_image_shape = {3, 48, 192});
+};
+} // namespace PaddleOCR
\ No newline at end of file
--- a/core/predictor/tools/ocrtools/utility.cpp
+++ b/core/predictor/tools/ocrtools/utility.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <ostream>
+#include <vector>
+#include "utility.h"
+namespace PaddleOCR {
+std::vector<std::string> Utility::ReadDict(const std::string &path) {
+  std::ifstream in(path);
+  std::string line;
+  std::vector<std::string> m_vec;
+  if (in) {
+    while (getline(in, line)) {
+      m_vec.push_back(line);
+    }
+  } else {
+    std::cout << "no such label file: " << path << ", exit the program..."
+              << std::endl;
+    exit(1);
+  }
+  return m_vec;
+}
+void Utility::VisualizeBboxes(
+    const cv::Mat &srcimg,
+    const std::vector<std::vector<std::vector<int>>> &boxes) {
+  cv::Mat img_vis;
+  srcimg.copyTo(img_vis);
+  for (int n = 0; n < boxes.size(); n++) {
+    cv::Point rook_points[4];
+    for (int m = 0; m < boxes[n].size(); m++) {
+      rook_points[m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
+    }
+    const cv::Point *ppt[1] = {rook_points};
+    int npt[] = {4};
+    cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+  }
+  cv::imwrite("./ocr_vis.png", img_vis);
+  std::cout << "The detection visualized image saved in ./ocr_vis.png"
+            << std::endl;
+}
+} // namespace PaddleOCR
\ No newline at end of file
--- a/core/predictor/tools/ocrtools/utility.h
+++ b/core/predictor/tools/ocrtools/utility.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <stdlib.h>
+#include <vector>
+#include <algorithm>
+#include <cstring>
+#include <fstream>
+#include <numeric>
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+namespace PaddleOCR {
+class Utility {
+public:
+  static std::vector<std::string> ReadDict(const std::string &path);
+  static void
+  VisualizeBboxes(const cv::Mat &srcimg,
+                  const std::vector<std::vector<std::vector<int>>> &boxes);
+  template <class ForwardIterator>
+  inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
+    return std::distance(first, std::max_element(first, last));
+  }
+};
+} // namespace PaddleOCR
\ No newline at end of file
--- a/doc/ABTEST_IN_PADDLE_SERVING.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING.md
@@ -4,7 +4,7 @@
 This document will use an example of text classification task based on IMDB dataset to show how to build a A/B Test framework using Paddle Serving. The structure relationship between the client and servers in the example is shown in the figure below.
-<img src="abtest.png" style="zoom:33%;" />
+<img src="abtest.png" style="zoom:25%;" />
 Note that:  A/B Test is only applicable to RPC mode, not web mode.
@@ -88,7 +88,7 @@ with open('processed.data') as f:
        cnt[tag]['total'] += 1
    for tag, data in cnt.items():
-        print('[{}](total: {}) acc: {}'.format(tag, data['total'], float(data['acc']) / float(data['total'])))
+        print('[{}]<total: {}> acc: {}'.format(tag, data['total'], float(data['acc']) / float(data['total'])))
 ```
 In the code, the function `client.add_variant(tag, clusters, variant_weight)` is to add a variant with label `tag` and flow weight `variant_weight`. In this example, a BOW variant with label of `bow` and flow weight of `10`, and an LSTM variant with label of `lstm` and a flow weight of `90` are added. The flow on the client side will be distributed to two variants according to the ratio of `10:90`.
@@ -98,8 +98,8 @@ When making prediction on the client side, if the parameter `need_variant_tag=Tr
 ### Expected Results
 Due to different network conditions, the results of each prediction may be slightly different.
 ``` python
-[lstm](total: 1867) acc: 0.490091055169
+[lstm]<total: 1867> acc: 0.490091055169
-[bow](total: 217) acc: 0.73732718894
+[bow]<total: 217> acc: 0.73732718894
 ```
 <!--

--- a/doc/ABTEST_IN_PADDLE_SERVING_CN.md
+++ b/doc/ABTEST_IN_PADDLE_SERVING_CN.md
@@ -92,7 +92,7 @@ with open('processed.data') as f:
        cnt[tag]['total'] += 1
    for tag, data in cnt.items():
-        print('[{}](total: {}) acc: {}'.format(tag, data['total'], float(data['acc'])/float(data['total']) ))
+        print('[{}]<total: {}> acc: {}'.format(tag, data['total'], float(data['acc'])/float(data['total']) ))
 ```
 代码中，`client.add_variant(tag, clusters, variant_weight)`是为了添加一个标签为`tag`、流量权重为`variant_weight`的variant。在这个样例中，添加了一个标签为`bow`、流量权重为`10`的BOW variant，以及一个标签为`lstm`、流量权重为`90`的LSTM variant。Client端的流量会根据`10:90`的比例分发到两个variant。
@@ -101,6 +101,6 @@ Client端做预测时，若指定参数`need_variant_tag=True`，返回值则包
 ### 预期结果
 由于网络情况的不同，可能每次预测的结果略有差异。
 ``` bash
-[lstm](total: 1867) acc: 0.490091055169
+[lstm]<total: 1867> acc: 0.490091055169
-[bow](total: 217) acc: 0.73732718894
+[bow]<total: 217> acc: 0.73732718894
 ```
--- a/doc/deprecated/CLIENT_CONFIGURE.md
+++ b/doc/deprecated/CLIENT_CONFIGURE.md
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -152,6 +152,24 @@ make -j10
 Execute `make install` to put the target output in the `./output` directory.
+### Compile C++ Server under the condition of WITH_OPENCV=ON
+**Note:** Only when you need to redevelop the paddle serving C + + part, and the new code depends on the OpenCV library, you need to do so.
+First of all , OpenCV library should be installed, if not, please refer to the `Compile and install OpenCV` section later in this article.
+In the compile command, add `DOPENCV_DIR=${OPENCV_DIR}` and `DWITH_OPENCV=ON`，for example：
+``` shell
+OPENCV_DIR=your_opencv_dir #`your_opencv_dir` is the installation path of OpenCV library。
+mkdir server-build-cpu && cd server-build-cpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \
+    -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
+    -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+    -DOPENCV_DIR=${OPENCV_DIR} \
+    -DWITH_OPENCV=ON
+    -DSERVER=ON ..
+make -j10
+```
 **Note:** After the compilation is successful, you need to set the `SERVING_BIN` path, see the following [Notes](COMPILE.md#Notes) ).
 ## Compile Client
@@ -209,6 +227,7 @@ Please use the example under `python/examples` to verify.
 |     WITH_AVX     | Compile Paddle Serving with AVX intrinsics | OFF  |
 |     WITH_MKL     |  Compile Paddle Serving with MKL support   | OFF  |
 |     WITH_GPU     |   Compile Paddle Serving with NVIDIA GPU   | OFF  |
+|     WITH_OPENCV  |    Compile Paddle Serving with OPENCV      | OFF  |
 |  CUDNN_LIBRARY   |    Define CuDNN library and header path    |      |
 | CUDA_TOOLKIT_ROOT_DIR |       Define CUDA PATH                |      |
 |   TENSORRT_ROOT  |           Define TensorRT PATH             |      |
@@ -247,3 +266,63 @@ The following is the base library version matching relationship used by the Padd
 ### How to make the compiler detect the CuDNN library
 Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
+## Compile and install OpenCV
+**Note:** You need to do this only if you need to import the opencv library into your C + + code.
+* First of all, you need to download the source code compiled package in the Linux environment from the OpenCV official website. Taking OpenCV3.4.7 as an example, the download command is as follows.
+```
+wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
+tar -xf 3.4.7.tar.gz
+```
+Finally, you can see the folder of `opencv-3.4.7/` in the current directory.
+* Compile OpenCV, the OpenCV source path (`root_path`) and installation path (`install_path`) should be set by yourself. Enter the OpenCV source code path and compile it in the following way.
+```shell
+root_path=your_opencv_root_path
+install_path=${root_path}/opencv3
+rm -rf build
+mkdir build
+cd build
+cmake .. \
+    -DCMAKE_INSTALL_PREFIX=${install_path} \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=OFF \
+    -DWITH_IPP=OFF \
+    -DBUILD_IPP_IW=OFF \
+    -DWITH_LAPACK=OFF \
+    -DWITH_EIGEN=OFF \
+    -DCMAKE_INSTALL_LIBDIR=lib64 \
+    -DWITH_ZLIB=ON \
+    -DBUILD_ZLIB=ON \
+    -DWITH_JPEG=ON \
+    -DBUILD_JPEG=ON \
+    -DWITH_PNG=ON \
+    -DBUILD_PNG=ON \
+    -DWITH_TIFF=ON \
+    -DBUILD_TIFF=ON
+make -j
+make install
+```
+Among them, `root_path` is the downloaded OpenCV source code path, and `install_path` is the installation path of OpenCV. After `make install` is completed, the OpenCV header file and library file will be generated in this folder for later source code compilation.
+The final file structure under the OpenCV installation path is as follows.
+```
+opencv3/
+|-- bin
+|-- include
+|-- lib
+|-- lib64
+|-- share
+```
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -151,8 +151,27 @@ make -j10
 执行`make install`可以把目标产出放在`./output`目录下。
+### 开启WITH_OPENCV选项编译C++ Server
+**注意：** 只有当您需要对Paddle Serving C++部分进行二次开发，且新增的代码依赖于OpenCV库时，您才需要这样做。
+编译Serving C++ Server部分，开启WITH_OPENCV选项时，需要已安装的OpenCV库，若尚未安装，可参考本文档后面的说明编译安装OpenCV库。
+以开启WITH_OPENCV选项，编译CPU版本Paddle Inference Library为例，在上述编译命令基础上，加入`DOPENCV_DIR=${OPENCV_DIR}` 和 `DWITH_OPENCV=ON`选项。
+``` shell
+OPENCV_DIR=your_opencv_dir #`your_opencv_dir`为opencv库的安装路径。
+mkdir server-build-cpu && cd server-build-cpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \
+    -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
+    -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+    -DOPENCV_DIR=${OPENCV_DIR} \
+    -DWITH_OPENCV=ON
+    -DSERVER=ON ..
+make -j10
+```
 **注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
 ## 编译Client部分
 ``` shell
@@ -174,7 +193,7 @@ make -j10
 mkdir app-build && cd app-build
 cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
    -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
-    -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \    
+    -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
    -DAPP=ON ..
 make
 ```
@@ -211,6 +230,7 @@ make
 |     WITH_MKL     |  Compile Paddle Serving with MKL support   | OFF  |
 |     WITH_GPU     |   Compile Paddle Serving with NVIDIA GPU   | OFF  |
 |     WITH_TRT     |    Compile Paddle Serving with TensorRT    | OFF  |
+|     WITH_OPENCV  |    Compile Paddle Serving with OPENCV      | OFF  |
 |  CUDNN_LIBRARY   |    Define CuDNN library and header path    |      |
 | CUDA_TOOLKIT_ROOT_DIR |       Define CUDA PATH                |      |
 |   TENSORRT_ROOT  |           Define TensorRT PATH             |      |
@@ -248,3 +268,61 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
 ### 如何让Paddle Serving编译系统探测到CuDNN库
 从NVIDIA developer官网下载对应版本CuDNN并在本地解压后，在cmake编译命令中增加`-DCUDNN_LIBRARY`参数，指定CuDNN库所在路径。
+## 编译安装OpenCV库
+**注意：** 只有当您需要在C++代码中引入OpenCV库时，您才需要这样做。
+* 首先需要从OpenCV官网上下载在Linux环境下源码编译的包，以OpenCV3.4.7为例，下载命令如下。
+```
+wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
+tar -xf 3.4.7.tar.gz
+```
+最终可以在当前目录下看到`opencv-3.4.7/`的文件夹。
+* 编译OpenCV，设置OpenCV源码路径(`root_path`)以及安装路径(`install_path`)。进入OpenCV源码路径下，按照下面的方式进行编译。
+```shell
+root_path=your_opencv_root_path
+install_path=${root_path}/opencv3
+rm -rf build
+mkdir build
+cd build
+cmake .. \
+    -DCMAKE_INSTALL_PREFIX=${install_path} \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DBUILD_SHARED_LIBS=OFF \
+    -DWITH_IPP=OFF \
+    -DBUILD_IPP_IW=OFF \
+    -DWITH_LAPACK=OFF \
+    -DWITH_EIGEN=OFF \
+    -DCMAKE_INSTALL_LIBDIR=lib64 \
+    -DWITH_ZLIB=ON \
+    -DBUILD_ZLIB=ON \
+    -DWITH_JPEG=ON \
+    -DBUILD_JPEG=ON \
+    -DWITH_PNG=ON \
+    -DBUILD_PNG=ON \
+    -DWITH_TIFF=ON \
+    -DBUILD_TIFF=ON
+make -j
+make install
+```
+其中`root_path`为下载的OpenCV源码路径，`install_path`为OpenCV的安装路径，`make install`完成之后，会在该文件夹下生成OpenCV头文件和库文件，用于引用OpenCV库的代码的编译。
+最终在安装路径下的文件结构如下所示。
+```
+opencv3/
+|-- bin
+|-- include
+|-- lib
+|-- lib64
+|-- share
+```
--- a/doc/CONTRIBUTE.md
+++ b/doc/CONTRIBUTE.md
@@ -132,7 +132,7 @@ Please install pre-commit, which automatically reformat the changes to C/C++ and
 Please remember to add related unit tests.
- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) .
+- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/docs/primer.md) .
 - For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).

--- a/doc/CUBE_LOCAL.md
+++ b/doc/CUBE_LOCAL.md
@@ -7,11 +7,10 @@
 There are two examples on CTR under python / examples, they are criteo_ctr, criteo_ctr_with_cube. The former is to save the entire model during training, including sparse parameters. The latter is to cut out the sparse parameters and save them into two parts, one is the sparse parameter and the other is the dense parameter. Because the scale of sparse parameters is very large in industrial cases, reaching the order of 10 ^ 9. Therefore, it is not practical to start large-scale sparse parameter prediction on one machine. Therefore, we introduced Baidu's industrial-grade product Cube to provide the sparse parameter service for many years to provide distributed sparse parameter services.
 The local mode of Cube is different from distributed Cube, which is designed to be convenient for developers to use in experiments and demos. 
-<!--If there is a demand for distributed sparse parameter service, please continue reading [Distributed Cube User Guide](./Distributed_Cube) after reading this document (still developing).-->
+<!--If there is a demand for distributed sparse parameter service, please continue reading [Quantization Storage on Cube Sparse Parameter Indexing](./CUBE_QUANT.md) after reading this document (still developing).-->
 This document uses the original model without any compression algorithm. If there is a need for a quantitative model to go online, please read the [Quantization Storage on Cube Sparse Parameter Indexing](./CUBE_QUANT.md)
 ## Example
 in directory python/example/criteo_ctr_with_cube, run

--- a/doc/CUBE_LOCAL_CN.md
+++ b/doc/CUBE_LOCAL_CN.md
@@ -6,7 +6,7 @@
 在python/examples下有两个关于CTR的示例，他们分别是criteo_ctr, criteo_ctr_with_cube。前者是在训练时保存整个模型，包括稀疏参数。后者是将稀疏参数裁剪出来，保存成两个部分，一个是稀疏参数，另一个是稠密参数。由于在工业级的场景中，稀疏参数的规模非常大，达到10^9数量级。因此在一台机器上启动大规模稀疏参数预测是不实际的，因此我们引入百度多年来在稀疏参数索引领域的工业级产品Cube，提供分布式的稀疏参数服务。
-<!--单机版Cube是分布式Cube的弱化版本，旨在方便开发者做实验和Demo时使用。如果有分布式稀疏参数服务的需求，请在读完此文档之后，继续阅读  [稀疏参数索引服务Cube使用指南](分布式Cube)（正在建设中）。-->
+<!--单机版Cube是分布式Cube的弱化版本，旨在方便开发者做实验和Demo时使用。如果有分布式稀疏参数服务的需求，请在读完此文档之后，继续阅读  [稀疏参数索引服务Cube使用指南](CUBE_LOCAL_CN.md)（正在建设中）。-->
 本文档使用的都是未经过任何压缩算法处理的原始模型，如果有量化模型上线需求，请阅读[Cube稀疏参数索引量化存储使用指南](./CUBE_QUANT_CN.md)

--- a/doc/DESIGN_DOC.md
+++ b/doc/DESIGN_DOC.md
@@ -70,7 +70,7 @@ The inference framework of the well-known deep learning platform only supports C
 > Model conversion across deep learning platforms
-Models trained on other deep learning platforms can be passed《[PaddlePaddle/X2Paddle工具](https://github.com/PaddlePaddle/X2Paddle)》.We convert multiple mainstream CV models to Paddle models. TensorFlow, Caffe, ONNX, PyTorch model conversion is tested.《[An End-to-end Tutorial from Training to Inference Service Deployment](TRAIN_TO_SERVICE.md)》
+Models trained on other deep learning platforms can be passed《[PaddlePaddle/X2Paddle工具](https://github.com/PaddlePaddle/X2Paddle)》.We convert multiple mainstream CV models to Paddle models. TensorFlow, Caffe, ONNX, PyTorch model conversion is tested.《[AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)》
 Because it is impossible to directly view the feed and fetch parameter information in the model file, it is not convenient for users to assemble the parameters. Therefore, Paddle Serving developed a tool to convert the Paddle model into Serving format and generate a prototxt file containing feed and fetch parameter information. The following figure is the generated prototxt file of the uci_housing example. For more conversion methods, refer to the document《[How to save a servable model of Paddle Serving?](SAVE.md)》.
 ```

--- a/doc/DESIGN_DOC_CN.md
+++ b/doc/DESIGN_DOC_CN.md
@@ -74,7 +74,7 @@ Paddle Serving提供了4种开发语言SDK，包括Python、C++、Java、Golang
 其他深度学习平台训练的模型，可以通过《[PaddlePaddle/X2Paddle工具](https://github.com/PaddlePaddle/X2Paddle)》将多个主流的CV模型转为Paddle模型，测试过TensorFlow、Caffe、ONNX、PyTorch模型转换。
-以IMDB评论情感分析任务为例通过9步展示，Paddle Serving从模型的训练到部署预测服务的全流程《[端到端完成从训练到部署全流程](TRAIN_TO_SERVICE_CN.md)》
+以IMDB评论情感分析任务为例通过9步展示，Paddle Serving从模型的训练到部署预测服务的全流程《[AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)》
 由于无法直接查看模型文件中feed和fetch参数信息，不方便用户拼装参数。因此，Paddle Serving开发一个工具将Paddle模型转成Serving的格式，生成包含feed和fetch参数信息的prototxt文件。下图是uci_housing示例的生成的prototxt文件，更多转换方法参考文档《[怎样保存用于Paddle Serving的模型](SAVE_CN.md)》。
 ```

--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -14,9 +14,9 @@
     0-int64
-      1-float32
+     1-float32
-      2-int32
+     2-int32
 #### Q: paddle-serving是否支持windows和Linux环境下的多线程调用 
@@ -222,9 +222,7 @@ InvalidArgumentError: Device id must be less than GPU count, but received id is:
 #### Q: python编译的GCC版本与serving的版本不匹配
-**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
+**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题；2)修改anaconda的虚拟环境下安装的python的gcc版本[改变python的GCC编译环境](https://www.jianshu.com/p/c498b3d86f77) 
-       2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77) 
 #### Q: paddle-serving是否支持本地离线安装 

--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
@@ -78,7 +78,7 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.w
 ```
 ## ARM user
-for ARM user who uses [PaddleLite](https://github.com/PaddlePaddle/PaddleLite) can download the wheel packages as follows. And ARM user should use the xpu-beta docker [DOCKER IMAGES](./DOCKER_IMAGES.md) 
+for ARM user who uses [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) can download the wheel packages as follows. And ARM user should use the xpu-beta docker [DOCKER IMAGES](./DOCKER_IMAGES.md) 
 **We only support Python 3.6 for Arm Users.**
 ### Wheel Package Links

--- a/doc/deprecated/NEW_OPERATOR.md
+++ b/doc/deprecated/NEW_OPERATOR.md
--- a/doc/deprecated/NEW_OPERATOR_CN.md
+++ b/doc/deprecated/NEW_OPERATOR_CN.md
--- a/doc/SERVER_DAG.md
+++ b/doc/SERVER_DAG.md
@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 ### Nodes with multiple inputs
-An example containing multiple input nodes is given in the [MODEL_ENSEMBLE_IN_PADDLE_SERVING](MODEL_ENSEMBLE_IN_PADDLE_SERVING.md). A example graph and the corresponding DAG definition code is as follows.
+An example containing multiple input nodes is given in the [MODEL_ENSEMBLE_IN_PADDLE_SERVING](./deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING.md). A example graph and the corresponding DAG definition code is as follows.
 <center>
 <img src='complex_dag.png' width = "480" height = "400" align="middle"/>

--- a/doc/SERVER_DAG_CN.md
+++ b/doc/SERVER_DAG_CN.md
@@ -47,7 +47,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 ### 包含多个输入的节点
-在[Paddle Serving中的集成预测](MODEL_ENSEMBLE_IN_PADDLE_SERVING_CN.md)文档中给出了一个包含多个输入节点的样例，示意图和代码如下。
+在[Paddle Serving中的集成预测](./deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING_CN.md)文档中给出了一个包含多个输入节点的样例，示意图和代码如下。
 <center>
 <img src='complex_dag.png' width = "480" height = "400" align="middle"/>

--- a/doc/deprecated/SERVING_CONFIGURE.md
+++ b/doc/deprecated/SERVING_CONFIGURE.md
--- a/doc/TENSOR_RT.md
+++ b/doc/TENSOR_RT.md
 ## Paddle Serving uses TensorRT
-(English|[简体中文]((./TENSOR_RT_CN.md)))
+(English|[简体中文](./TENSOR_RT_CN.md))
 ### Background

--- a/doc/WINDOWS_TUTORIAL_CN.md
+++ b/doc/WINDOWS_TUTORIAL_CN.md
@@ -14,7 +14,7 @@
 **安装Git工具**： 详情参见[Git官网](https://git-scm.com/downloads)
-**安装必要的C++库（可选）**：部分用户可能会在`import paddle`阶段遇见dll无法链接的问题，建议可以[安装Visual Studio社区版本](`https://visualstudio.microsoft.com/`) ，并且安装C++的相关组件。
+**安装必要的C++库（可选）**：部分用户可能会在`import paddle`阶段遇见dll无法链接的问题，建议[安装Visual Studio社区版本](https://visualstudio.microsoft.com/) ，并且安装C++的相关组件。
 **安装Paddle和Serving**：在Powershell，执行

--- a/doc/deprecated/CREATING.md
+++ b/doc/deprecated/CREATING.md
@@ -77,7 +77,7 @@ service ImageClassifyService {
 关于Serving端的配置的详细信息，可以参考[Serving端配置](SERVING_CONFIGURE.md)
-以下配置文件将ReaderOP, ClassifyOP和WriteJsonOP串联成一个workflow (关于OP/workflow等概念，可参考[设计文档](../DESIGN.md))
+以下配置文件将ReaderOP, ClassifyOP和WriteJsonOP串联成一个workflow (关于OP/workflow等概念，可参考[设计文档](DESIGN.md))
 - 配置文件示例：

--- a/doc/deprecated/DESIGN.md
+++ b/doc/deprecated/DESIGN.md
@@ -45,11 +45,11 @@ Models that can be predicted using the Paddle Inference Library, models saved du
 ### 3.4 Server Inferface
-![Server Interface](server_interface.png)
+![Server Interface](../server_interface.png)
 ### 3.5 Client Interface
-<img src='client_inferface.png' width = "600" height = "200">
+<img src='../client_inferface.png' width = "600" height = "200">
 ### 3.6 Client io used during Training
@@ -66,7 +66,7 @@ def save_model(server_model_folder,
 ## 4. Paddle Serving Underlying Framework
-![Paddle-Serging Overall Architecture](framework.png)
+![Paddle-Serging Overall Architecture](../framework.png)
 **Model Management Framework**: Connects model files of multiple machine learning platforms and provides a unified inference interface
 **Business Scheduling Framework**: Abstracts the calculation logic of various different inference models, provides a general DAG scheduling framework, and connects different operators through DAG diagrams to complete a prediction service together. This abstract model allows users to conveniently implement their own calculation logic, and at the same time facilitates operator sharing. (Users build their own forecasting services. A large part of their work is to build DAGs and provide operators.)
@@ -102,31 +102,31 @@ class FluidFamilyCore {
 With reference to the abstract idea of model calculation of the TensorFlow framework, the business logic is abstracted into a DAG diagram, driven by configuration, generating a workflow, and skipping C ++ code compilation. Each specific step of the service corresponds to a specific OP. The OP can configure the upstream OP that it depends on. Unified message passing between OPs is achieved by the thread-level bus and channel mechanisms. For example, the service process of a simple prediction service can be abstracted into 3 steps including reading request data-> calling the prediction interface-> writing back the prediction result, and correspondingly implemented to 3 OP: ReaderOp-> ClassifyOp-> WriteOp
-![Infer Service](predict-service.png)
+![Infer Service](../predict-service.png)
-Regarding the dependencies between OPs, and the establishment of workflows through OPs, you can refer to [从零开始写一个预测服务](./deprecated/CREATING.md) (simplified Chinese Version)
+Regarding the dependencies between OPs, and the establishment of workflows through OPs, you can refer to [从零开始写一个预测服务](CREATING.md) (simplified Chinese Version)
 Server instance perspective
-![Server instance perspective](server-side.png)
+![Server instance perspective](../server-side.png)
 #### 4.2.2 Paddle Serving Multi-Service Mechanism
-![Paddle Serving multi-service](multi-service.png)
+![Paddle Serving multi-service](../multi-service.png)
-Paddle Serving instances can load multiple models at the same time, and each model uses a Service (and its configured workflow) to undertake services. You can refer to [service configuration file in Demo example](../tools/cpp_examples/demo-serving/conf/service.prototxt) to learn how to configure multiple services for the serving instance
+Paddle Serving instances can load multiple models at the same time, and each model uses a Service (and its configured workflow) to undertake services. You can refer to [service configuration file in Demo example](../../tools/cpp_examples/demo-serving/conf/service.prototxt) to learn how to configure multiple services for the serving instance
 #### 4.2.3 Hierarchical relationship of business scheduling
 From the client's perspective, a Paddle Serving service can be divided into three levels: Service, Endpoint, and Variant from top to bottom.
-![Call hierarchy relationship](multi-variants.png)
+![Call hierarchy relationship](../multi-variants.png)
 One Service corresponds to one inference model, and there is one endpoint under the model. Different versions of the model are implemented through multiple variant concepts under endpoint:
-The same model prediction service can configure multiple variants, and each variant has its own downstream IP list. The client code can configure relative weights for each variant to achieve the relationship of adjusting the traffic ratio (refer to the description of variant_weight_list in [Client Configuration](./deprecated/CLIENT_CONFIGURE.md) section 3.2).
+The same model prediction service can configure multiple variants, and each variant has its own downstream IP list. The client code can configure relative weights for each variant to achieve the relationship of adjusting the traffic ratio (refer to the description of variant_weight_list in [Client Configuration](../CLIENT_CONFIGURE.md) section 3.2).
-![Client-side proxy function](client-side-proxy.png)
+![Client-side proxy function](../client-side-proxy.png)
 ## 5. User Interface
@@ -141,7 +141,7 @@ No matter how the communication protocol changes, the framework only needs to en
 ### 5.1 Data Compression Method
-Baidu-rpc has built-in data compression methods such as snappy, gzip, zlib, which can be configured in the configuration file (refer to [Client Configuration](./deprecated/CLIENT_CONFIGURE.md) Section 3.1 for an introduction to compress_type)
+Baidu-rpc has built-in data compression methods such as snappy, gzip, zlib, which can be configured in the configuration file (refer to [Client Configuration](../CLIENT_CONFIGURE.md) Section 3.1 for an introduction to compress_type)
 ### 5.2 C ++ SDK API Interface

--- a/doc/deprecated/DESIGN_CN.md
+++ b/doc/deprecated/DESIGN_CN.md
@@ -47,11 +47,11 @@ PaddlePaddle是百度开源的机器学习框架，广泛支持各种深度学
 ### 3.4 Server Inferface
-![Server Interface](server_interface.png)
+![Server Interface](../server_interface.png)
 ### 3.5 Client Interface
-<img src='client_inferface.png' width = "600" height = "200">
+<img src='../client_inferface.png' width = "600" height = "200">
 ### 3.6 训练过程中使用的Client io
@@ -68,7 +68,7 @@ def save_model(server_model_folder,
 ## 4. Paddle Serving底层框架
-![Paddle-Serging总体框图](framework.png)
+![Paddle-Serging总体框图](../framework.png)
 **模型管理框架**：对接多种机器学习平台的模型文件，向上提供统一的inference接口
 **业务调度框架**：对各种不同预测模型的计算逻辑进行抽象，提供通用的DAG调度框架，通过DAG图串联不同的算子，共同完成一次预测服务。该抽象模型使用户可以方便的实现自己的计算逻辑，同时便于算子共用。（用户搭建自己的预测服务，很大一部分工作是搭建DAG和提供算子的实现）
@@ -104,31 +104,31 @@ class FluidFamilyCore {
 参考TF框架的模型计算的抽象思想，将业务逻辑抽象成DAG图，由配置驱动，生成workflow，跳过C++代码编译。业务的每个具体步骤，对应一个具体的OP，OP可配置自己依赖的上游OP。OP之间消息传递统一由线程级Bus和channel机制实现。例如，一个简单的预测服务的服务过程，可以抽象成读请求数据->调用预测接口->写回预测结果等3个步骤，相应的实现到3个OP: ReaderOp->ClassifyOp->WriteOp
-![预测服务Service](predict-service.png)
+![预测服务Service](../predict-service.png)
 关于OP之间的依赖关系，以及通过OP组建workflow，可以参考[从零开始写一个预测服务](https://github.com/PaddlePaddle/Serving/blob/develop/doc/deprecated/CREATING.md)的相关章节
 服务端实例透视图
-![服务端实例透视图](server-side.png)
+![服务端实例透视图](../server-side.png)
 #### 4.2.2 Paddle Serving的多服务机制
-![Paddle Serving的多服务机制](multi-service.png)
+![Paddle Serving的多服务机制](../multi-service.png)
-Paddle Serving实例可以同时加载多个模型，每个模型用一个Service（以及其所配置的workflow）承接服务。可以参考[Demo例子中的service配置文件](../tools/cpp_examples/demo-serving/conf/service.prototxt)了解如何为serving实例配置多个service
+Paddle Serving实例可以同时加载多个模型，每个模型用一个Service（以及其所配置的workflow）承接服务。可以参考[Demo例子中的service配置文件](../..//tools/cpp_examples/demo-serving/conf/service.prototxt)了解如何为serving实例配置多个service
 #### 4.2.3 业务调度层级关系
 从客户端看，一个Paddle Serving service从顶向下可分为Service, Endpoint, Variant等3个层级
-![调用层级关系](multi-variants.png)
+![调用层级关系](../multi-variants.png)
 一个Service对应一个预测模型，模型下有1个endpoint。模型的不同版本，通过endpoint下多个variant概念实现：
-同一个模型预测服务，可以配置多个variant，每个variant有自己的下游IP列表。客户端代码可以对各个variant配置相对权重，以达到调节流量比例的关系（参考[客户端配置](./deprecated/CLIENT_CONFIGURE.md)第3.2节中关于variant_weight_list的说明）。
+同一个模型预测服务，可以配置多个variant，每个variant有自己的下游IP列表。客户端代码可以对各个variant配置相对权重，以达到调节流量比例的关系（参考[客户端配置](CLIENT_CONFIGURE.md)第3.2节中关于variant_weight_list的说明）。
-![Client端proxy功能](client-side-proxy.png)
+![Client端proxy功能](../client-side-proxy.png)
 ## 5. 用户接口
@@ -143,7 +143,7 @@ Paddle Serving实例可以同时加载多个模型，每个模型用一个Servic
 ### 5.1 数据压缩方法
-Baidu-rpc内置了snappy, gzip, zlib等数据压缩方法，可在配置文件中配置（参考[客户端配置](./deprecated/CLIENT_CONFIGURE.md)第3.1节关于compress_type的介绍）
+Baidu-rpc内置了snappy, gzip, zlib等数据压缩方法，可在配置文件中配置（参考[客户端配置](CLIENT_CONFIGURE.md)第3.1节关于compress_type的介绍）
 ### 5.2 C++ SDK API接口

--- a/doc/deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING.md
+++ b/doc/deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING.md
@@ -10,7 +10,7 @@ Next, we will take the text classification task as an example to show model ense
 In this example (see the figure below), the server side predict the bow and CNN models with the same input in a service in parallel, The client side fetchs the prediction results of the two models, and processes the prediction results to get the final predict results.
-![simple example](model_ensemble_example.png)
+![simple example](../model_ensemble_example.png)
 It should be noted that at present, only multiple models with the same format input and output in the same service are supported. In this example, the input and output formats of CNN and BOW model are the same.

--- a/doc/deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING_CN.md
+++ b/doc/deprecated/MODEL_ENSEMBLE_IN_PADDLE_SERVING_CN.md
@@ -10,7 +10,7 @@
 该样例中（见下图），Server端在一项服务中并行预测相同输入的BOW和CNN模型，Client端获取两个模型的预测结果并进行后处理，得到最终的预测结果。
-![simple example](model_ensemble_example.png)
+![simple example](../model_ensemble_example.png)
 需要注意的是，目前只支持在同一个服务中使用多个相同格式输入输出的模型。在该例子中，CNN模型和BOW模型的输入输出格式是相同的。

--- a/doc/deprecated/NEW_WEB_SERVICE.md
+++ b/doc/deprecated/NEW_WEB_SERVICE.md
@@ -2,7 +2,7 @@
 ([简体中文](NEW_WEB_SERVICE_CN.md)|English)
-This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../python/examples/imagenet/resnet50_web_service.py).
+This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../../python/examples/imagenet/resnet50_web_service.py).
 ## WebService base class

--- a/doc/deprecated/NEW_WEB_SERVICE_CN.md
+++ b/doc/deprecated/NEW_WEB_SERVICE_CN.md
@@ -2,7 +2,7 @@
 (简体中文|[English](NEW_WEB_SERVICE.md))
-本文档将以Imagenet图像分类服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。
+本文档将以Imagenet图像分类服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](../../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。
 ## WebService基类

--- a/java/examples/src/main/java/PipelineClientExample.java
+++ b/java/examples/src/main/java/PipelineClientExample.java
@@ -32,7 +32,7 @@ public class PipelineClientExample {
        System.out.println(fetch);
        if (StaticPipelineClient.succ != true) {
-            if(!StaticPipelineClient.initClient("127.0.0.1","18070")){
+            if (!StaticPipelineClient.initClient("127.0.0.1","18070")) {
                System.out.println("connect failed.");
                return false;
            }
@@ -57,7 +57,7 @@ public class PipelineClientExample {
        List<String> fetch = Arrays.asList("prediction");
        System.out.println(fetch);
        if (StaticPipelineClient.succ != true) {
-            if(!StaticPipelineClient.initClient("127.0.0.1","18070")){
+            if (!StaticPipelineClient.initClient("127.0.0.1","18070")) {
                System.out.println("connect failed.");
                return false;
            }
@@ -86,7 +86,7 @@ public class PipelineClientExample {
            }};
        List<String> fetch = Arrays.asList("prediction");
        if (StaticPipelineClient.succ != true) {
-            if(!StaticPipelineClient.initClient("127.0.0.1","9998")){
+            if (!StaticPipelineClient.initClient("127.0.0.1","9998")) {
                System.out.println("connect failed.");
                return false;
            }
@@ -105,7 +105,7 @@ public class PipelineClientExample {
   * @param npdata INDArray type(The input data).
   * @return String (specified String type for python Numpy eval method).
   */
-    String convertINDArrayToString(INDArray npdata){
+    String convertINDArrayToString(INDArray npdata) {
        return "array("+npdata.toString()+")";
    }

--- a/java/examples/src/main/java/StaticPipelineClient.java
+++ b/java/examples/src/main/java/StaticPipelineClient.java
@@ -30,10 +30,10 @@ public class StaticPipelineClient {
   * @param strPort String type(The server port) such as "8891".
   * @return boolean (the sign of connect status).
   */
-    public static boolean initClient(String strIp,String strPort){
+    public static boolean initClient(String strIp,String strPort) {
        String target = strIp+ ":"+ strPort;//"172.17.0.2:18070";
        System.out.println("initial connect.");
-        if(succ){
+        if (succ) {
            System.out.println("already connect.");
            return true;
        }

--- a/java/src/main/java/io/paddle/serving/client/PipelineClient.java
+++ b/java/src/main/java/io/paddle/serving/client/PipelineClient.java
@@ -88,7 +88,7 @@ public class PipelineClient {
            keys.add(entry.getKey());
            values.add(entry.getValue());
        }
-        if(profile){
+        if (profile) {
            keys.add(_profile_key);
            values.add(_profile_value);
        }

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -37,9 +37,24 @@ using paddle_infer::Tensor;
 using paddle_infer::CreatePredictor;
 DECLARE_int32(gpuid);
+DECLARE_string(precision);
+DECLARE_bool(use_calib);
 static const int max_batch = 32;
 static const int min_subgraph_size = 3;
+static PrecisionType precision_type;
+PrecisionType GetPrecision(const std::string& precision_data) {
+  std::string precision_type = predictor::ToLower(precision_data);
+  if (precision_type == "fp32") {
+    return PrecisionType::kFloat32;
+  } else if (precision_type == "int8") {
+    return PrecisionType::kInt8;
+  } else if (precision_type == "fp16") {
+    return PrecisionType::kHalf;
+  }
+  return PrecisionType::kFloat32;
+}
 // Engine Base
 class PaddleEngineBase {
@@ -107,9 +122,9 @@ class PaddleInferenceEngine : public PaddleEngineBase {
    if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
      // decrypt model
      std::string model_buffer, params_buffer, key_buffer;
-      predictor::ReadBinaryFile(model_path + "encrypt_model", &model_buffer);
+      predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
-      predictor::ReadBinaryFile(model_path + "encrypt_params", &params_buffer);
+      predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
-      predictor::ReadBinaryFile(model_path + "key", &key_buffer);
+      predictor::ReadBinaryFile(model_path + "/key", &key_buffer);
      auto cipher = paddle::MakeCipher("");
      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
@@ -137,6 +152,7 @@ class PaddleInferenceEngine : public PaddleEngineBase {
      // 2000MB GPU memory
      config.EnableUseGpu(2000, FLAGS_gpuid);
    }
+    precision_type = GetPrecision(FLAGS_precision);
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
@@ -145,14 +161,24 @@ class PaddleInferenceEngine : public PaddleEngineBase {
      config.EnableTensorRtEngine(1 << 20,
                                  max_batch,
                                  min_subgraph_size,
-                                  Config::Precision::kFloat32,
+                                  precision_type,
                                  false,
-                                  false);
+                                  FLAGS_use_calib);
      LOG(INFO) << "create TensorRT predictor";
    }
    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
-      config.EnableLiteEngine(PrecisionType::kFloat32, true);
+      config.EnableLiteEngine(precision_type, true);
+    }
+    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
+        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
+         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
+      if (precision_type == PrecisionType::kInt8) {
+        config.EnableMkldnnQuantizer();
+      } else if (precision_type == PrecisionType::kHalf) {
+        config.EnableMkldnnBfloat16();
+      }
    }
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
@@ -171,7 +197,6 @@ class PaddleInferenceEngine : public PaddleEngineBase {
      config.EnableMemoryOptim();
    }
    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
    _predictor = CreatePredictor(config);
    if (NULL == _predictor.get()) {

--- a/paddle_inference/paddle/src/paddle_engine.cpp
+++ b/paddle_inference/paddle/src/paddle_engine.cpp
@@ -20,6 +20,8 @@ namespace paddle_serving {
 namespace inference {
 DEFINE_int32(gpuid, 0, "GPU device id to use");
+DEFINE_string(precision, "fp32", "precision to deploy, default is fp32");
+DEFINE_bool(use_calib, false, "calibration mode, default is false");
 REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
    ::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,

--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf faster_rcnn_hrnetv2p_w18_1x.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`. 

--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf faster_rcnn_hrnetv2p_w18_1x.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。

--- a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md
+++ b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md
@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf fcos_dcn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
@@ -18,4 +18,3 @@ This model support TensorRT, if you want a faster inference, please use `--use_t
 ```
 python test_client.py 000000570688.jpg
 ```
--- a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md
+++ b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md
@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf fcos_dcn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
@@ -20,4 +20,3 @@ python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --g
 ```
 python test_client.py 000000570688.jpg
 ```
--- a/python/examples/detection/ssd_vgg16_300_240e_voc/README.md
+++ b/python/examples/detection/ssd_vgg16_300_240e_voc/README.md
@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf ssd_vgg16_300_240e_voc.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
@@ -18,4 +18,3 @@ This model support TensorRT, if you want a faster inference, please use `--use_t
 ```
 python test_client.py 000000570688.jpg
 ```
--- a/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md
+++ b/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md
@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf ssd_vgg16_300_240e_voc.tar
-python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
+python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
@@ -20,4 +20,3 @@ python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --g
 ```
 python test_client.py 000000570688.jpg
 ```
--- a/python/examples/encryption/README.md
+++ b/python/examples/encryption/README.md
@@ -13,12 +13,25 @@ sh get_data.sh
 ## Encrypt Model
 The `paddlepaddle` package is used in this example, you may need to download the corresponding package(`pip install paddlepaddle`).
+[python encrypt.py](./encrypt.py)
+[//file]:#encrypt.py
+``` python
+def serving_encryption():
+    inference_model_to_serving(
+        dirname="./uci_housing_model",
+        params_filename=None,
+        serving_server="encrypt_server",
+        serving_client="encrypt_client",
+        encryption=True)
 ```
-python encrypt.py
+dirname is the folder path where the model is located. If the parameter is discrete, it is unnecessary to specify params_filename, else you need to set `params_filename="__params__"`.
-```
 The key is stored in the `key` file, and the encrypted model file and server-side configuration file are stored in the `encrypt_server` directory.
 client-side configuration file are stored in the `encrypt_client` directory.
+**Notice：** When encryption prediction is used, the model configuration and parameter folder loaded by server and client should be encrypt_server/ and encrypt_client/
 ## Start Encryption Service
 CPU Service
 ```
@@ -31,5 +44,5 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
 ## Prediction
 ```
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python test_client.py encrypt_client/serving_client_conf.prototxt
 ```
--- a/python/examples/encryption/README_CN.md
+++ b/python/examples/encryption/README_CN.md
@@ -12,11 +12,27 @@ sh get_data.sh
 ## 模型加密
 本示例中使用了`paddlepaddle`包中的模块，需要进行下载（`pip install paddlepaddle`）。
-```
-python encrypt.py
+运行[python encrypt.py](./encrypt.py)进行模型加密
-```
+[//file]:#encrypt.py
+``` python
+def serving_encryption():
+    inference_model_to_serving(
+        dirname="./uci_housing_model",
+        params_filename=None,
+        serving_server="encrypt_server",
+        serving_client="encrypt_client",
+        encryption=True)
+```
+其中dirname为模型所在的文件夹路径
+当参数为离散参数时，无须指定params_filename，当参数为__params__时，需指定`params_filename="__params__"`
 密钥保存在`key`文件中，加密模型文件以及server端配置文件保存在`encrypt_server`目录下，client端配置文件保存在`encrypt_client`目录下。
+**注意：** 当使用加密预测时，服务端和客户端启动加载的模型配置和参数文件夹是encrypt_server/和encrypt_client/
 ## 启动加密预测服务
 CPU预测服务
 ```
@@ -29,5 +45,5 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
 ## 预测
 ```
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python test_client.py encrypt_client/
 ```
--- a/python/examples/encryption/encrypt.py
+++ b/python/examples/encryption/encrypt.py
@@ -18,6 +18,7 @@ from paddle_serving_client.io import inference_model_to_serving
 def serving_encryption():
    inference_model_to_serving(
        dirname="./uci_housing_model",
+        params_filename=None,
        serving_server="encrypt_server",
        serving_client="encrypt_client",
        encryption=True)

--- a/python/examples/fit_a_line/test_client.py
+++ b/python/examples/fit_a_line/test_client.py
@@ -28,7 +28,7 @@ test_reader = paddle.batch(
    batch_size=1)
 for data in test_reader():
-    new_data = np.zeros((1, 1, 13)).astype("float32")
+    new_data = np.zeros((1, 13)).astype("float32")
    new_data[0] = data[0][0]
    fetch_map = client.predict(
        feed={"x": new_data}, fetch=["price"], batch=True)

--- a/python/examples/ocr/README.md
+++ b/python/examples/ocr/README.md
@@ -98,3 +98,30 @@ python rec_debugger_server.py gpu #for gpu user
 ```
 python rec_web_client.py
 ```
+## C++ OCR Service
+**Notice：** If you need to concatenate det model and rec model, and do pre-processing and post-processing in Paddle Serving C++ framework, you need to use the C++ server compiled with WITH_OPENCV option，see the [COMPILE.md](../../../doc/COMPILE.md)
+### Start Service
+Select a startup mode according to CPU / GPU device
+After the -- model parameter, the folder path of multiple model files is passed in to start the prediction service of multiple model concatenation.
+```
+#for cpu user
+python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
+#for gpu user
+python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_id 0
+```
+### Client Prediction
+The pre-processing and post-processing is in the C + + server part, the image's Base64 encoded string is passed into the C + + server.
+so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed.
+for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1.
+By passing in multiple client folder paths, the client can be started for multi model prediction.
+```
+python ocr_cpp_client.py ocr_det_client ocr_rec_client
+```
--- a/python/examples/ocr/README_CN.md
+++ b/python/examples/ocr/README_CN.md
@@ -98,3 +98,29 @@ python rec_debugger_server.py gpu #for gpu user
 ```
 python rec_web_client.py
 ```
+## C++ OCR Service服务
+**注意：** 若您需要使用Paddle Serving C++框架串联det模型和rec模型，并进行前后处理，您需要使用开启WITH_OPENCV选项编译的C++ Server，详见[COMPILE.md](../../../doc/COMPILE.md)
+### 启动服务
+根据CPU/GPU设备选择一种启动方式
+通过--model后，指定多个模型文件的文件夹路径来启动多模型串联的预测服务。
+```
+#for cpu user
+python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
+#for gpu user
+python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_id 0
+```
+### 启动客户端
+由于需要在C++Server部分进行前后处理，传入C++Server的仅仅是图片的base64编码的字符串，故第一个模型的Client配置需要修改
+即`ocr_det_client/serving_client_conf.prototxt`中`feed_var`字段
+对于本示例而言，`feed_type`应修改为3(数据类型为string),`shape`为1.
+通过在客户端启动后加入多个client模型的client配置文件夹路径，启动client进行预测。
+```
+python ocr_cpp_client.py ocr_det_client ocr_rec_client
+```
--- a/python/examples/ocr/ocr_cpp_client.py
+++ b/python/examples/ocr/ocr_cpp_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from paddle_serving_client import Client
+import sys
+import numpy as np
+import base64
+import os
+import cv2
+from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
+from paddle_serving_app.reader import Div, Normalize, Transpose
+client = Client()
+# TODO:load_client need to load more than one client model.
+# this need to figure out some details.
+client.load_client_config(sys.argv[1:])
+client.connect(["127.0.0.1:9293"])
+import paddle
+test_img_dir = "imgs/"
+def cv2_to_base64(image):
+    return base64.b64encode(image) #data.tostring()).decode('utf8')
+for img_file in os.listdir(test_img_dir):
+    with open(os.path.join(test_img_dir, img_file), 'rb') as file:
+        image_data = file.read()
+    image = cv2_to_base64(image_data)
+    fetch_map = client.predict(
+        feed={"image": image}, fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"], batch=True)
+    #print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
+    print(fetch_map)
--- a/python/examples/ocr/ocr_debugger_server.py
+++ b/python/examples/ocr/ocr_debugger_server.py
@@ -106,8 +106,8 @@ ocr_service.load_model_config("ocr_rec_model")
 ocr_service.prepare_server(workdir="workdir", port=9292)
 ocr_service.init_det_debugger(det_model_config="ocr_det_model")
 if sys.argv[1] == 'gpu':
-    ocr_service.set_gpus("2")
+    ocr_service.set_gpus("0")
-    ocr_service.run_debugger_service(gpu = True)
+    ocr_service.run_debugger_service(gpu=True)
 elif sys.argv[1] == 'cpu':
    ocr_service.run_debugger_service()
 ocr_service.run_web_service()
--- a/python/examples/ocr/rec_debugger_server.py
+++ b/python/examples/ocr/rec_debugger_server.py
@@ -71,7 +71,8 @@ ocr_service.load_model_config("ocr_rec_model")
 if sys.argv[1] == 'gpu':
    ocr_service.set_gpus("0")
    ocr_service.init_rec()
-    ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
+    ocr_service.prepare_server(
+        workdir="workdir", port=9292, device="gpu", gpuid=0)
 elif sys.argv[1] == 'cpu':
    ocr_service.init_rec()
    ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")

--- a/python/examples/pipeline/bert/pipeline_rpc_client.py
+++ b/python/examples/pipeline/bert/pipeline_rpc_client.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import sys
 import os
 import yaml
 import requests
 import time
 import json
-try:
+from paddle_serving_server.pipeline import PipelineClient
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 client = PipelineClient()
 client.connect(['127.0.0.1:9998'])
 batch_size = 101
 with open("data-c.txt", 'r') as fin:
-     lines = fin.readlines()
+    lines = fin.readlines()
-     start_idx = 0
+    start_idx = 0
-     while start_idx < len(lines):
+    while start_idx < len(lines):
-         end_idx = min(len(lines), start_idx + batch_size)
+        end_idx = min(len(lines), start_idx + batch_size)
-         feed = {}
+        feed = {}
-         for i in range(start_idx, end_idx):
+        for i in range(start_idx, end_idx):
-             feed[str(i - start_idx)] = lines[i]
+            feed[str(i - start_idx)] = lines[i]
-         ret = client.predict(feed_dict=feed, fetch=["res"])
+        ret = client.predict(feed_dict=feed, fetch=["res"])
-         print(ret)
+        print(ret)
-         start_idx += batch_size
+        start_idx += batch_size
--- a/python/examples/pipeline/bert/web_service.py
+++ b/python/examples/pipeline/bert/web_service.py
@@ -11,10 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
+from paddle_serving_server.web_service import WebService, Op
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import sys
@@ -37,7 +34,8 @@ class BertOp(Op):
        for i in range(batch_size):
            feed_dict = self.reader.process(input_dict[str(i)].encode("utf-8"))
            for key in feed_dict.keys():
-                feed_dict[key] = np.array(feed_dict[key]).reshape((1, len(feed_dict[key]), 1))
+                feed_dict[key] = np.array(feed_dict[key]).reshape(
+                    (1, len(feed_dict[key]), 1))
            feed_res.append(feed_dict)
        feed_dict = {}
        for key in feed_res[0].keys():
@@ -57,5 +55,5 @@ class BertService(WebService):
 bert_service = BertService(name="bert")
-bert_service.prepare_pipeline_config("config2.yml")
+bert_service.prepare_pipeline_config("config.yml")
 bert_service.run_service()
--- a/python/examples/pipeline/imagenet/resnet50_web_service.py
+++ b/python/examples/pipeline/imagenet/resnet50_web_service.py
@@ -13,10 +13,7 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
+from paddle_serving_server.web_service import WebService, Op
-    from paddle_serving_server.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2

--- a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py
+++ b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py
@@ -12,17 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=doc-string-missing
+import numpy as np
+from paddle_serving_app.reader.imdb_reader import IMDBDataset
+import logging
+from paddle_serving_server.web_service import WebService
 from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
 from paddle_serving_server.pipeline import PipelineServer
 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
 from paddle_serving_server.pipeline.channel import ChannelDataErrcode
-import numpy as np
-from paddle_serving_app.reader.imdb_reader import IMDBDataset
-import logging
-try:
-    from paddle_serving_server.web_service import WebService
-except ImportError:
-    from paddle_serving_server.web_service import WebService
 _LOGGER = logging.getLogger()
 user_handler = logging.StreamHandler()

--- a/python/examples/pipeline/ocr/config.yml
+++ b/python/examples/pipeline/ocr/config.yml
@@ -40,7 +40,7 @@ op:
            fetch_list: ["concat_1.tmp_0"]
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
-            devices: "2"
+            devices: "0"
    rec:
        #并发数，is_thread_op=True时，为线程并发；否则为进程并发
        concurrency: 2
@@ -64,4 +64,4 @@ op:
            fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] 
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
-            devices: "2"
+            devices: "0"
--- a/python/examples/pipeline/ocr/web_service.py
+++ b/python/examples/pipeline/ocr/web_service.py
@@ -11,10 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
+from paddle_serving_server.web_service import WebService, Op
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import cv2
@@ -48,7 +45,7 @@ class DetOp(Op):
        imgs = []
        for key in input_dict.keys():
            data = base64.b64decode(input_dict[key].encode('utf8'))
-            data = np.fromstring(data, np.uint8)
+            data = np.frombuffer(data, np.uint8)
            self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
            self.ori_h, self.ori_w, _ = self.im.shape
            det_img = self.det_preprocess(self.im)
@@ -57,7 +54,7 @@ class DetOp(Op):
        return {"image": np.concatenate(imgs, axis=0)}, False, None, ""
    def postprocess(self, input_dicts, fetch_dict, log_id):
-#        print(fetch_dict)
+        #        print(fetch_dict)
        det_out = fetch_dict["concat_1.tmp_0"]
        ratio_list = [
            float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
@@ -114,5 +111,5 @@ class OcrService(WebService):
 uci_service = OcrService(name="ocr")
-uci_service.prepare_pipeline_config("config2.yml")
+uci_service.prepare_pipeline_config("config.yml")
 uci_service.run_service()
--- a/python/examples/pipeline/simple_web_service/web_service.py
+++ b/python/examples/pipeline/simple_web_service/web_service.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server.web_service import WebService, Op
+from paddle_serving_server.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import sys
@@ -34,8 +32,11 @@ class UciOp(Op):
        x_value = input_dict["x"].split(self.batch_separator)
        x_lst = []
        for x_val in x_value:
-            x_lst.append(np.array([float(x.strip()) for x in x_val.split(self.separator)]).reshape(1, 13))
+            x_lst.append(
-        input_dict["x"] = np.concatenate(x_lst, axis=0) 
+                np.array([
+                    float(x.strip()) for x in x_val.split(self.separator)
+                ]).reshape(1, 13))
+        input_dict["x"] = np.concatenate(x_lst, axis=0)
        proc_dict = {}
        return input_dict, False, None, ""
@@ -53,5 +54,5 @@ class UciService(WebService):
 uci_service = UciService(name="uci")
-uci_service.prepare_pipeline_config("config2.yml")
+uci_service.prepare_pipeline_config("config.yml")
 uci_service.run_service()
--- a/python/examples/pipeline/simple_web_service/web_service_java.py
+++ b/python/examples/pipeline/simple_web_service/web_service_java.py
@@ -11,10 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
+from paddle_serving_server.web_service import WebService, Op
-    from paddle_serving_server.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 from numpy import array

--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -13,13 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_server.web_service import WebService
-from paddle_serving_client import Client
-from paddle_serving_app.reader import LACReader, SentaReader
 import os
 import sys
 import numpy as np
-#senta_web_service.py
 from paddle_serving_server.web_service import WebService
 from paddle_serving_client import Client
 from paddle_serving_app.reader import LACReader, SentaReader

--- a/python/examples/xpu/fit_a_line_xpu/test_server.py
+++ b/python/examples/xpu/fit_a_line_xpu/test_server.py
@@ -31,6 +31,7 @@ class UciService(WebService):
 uci_service = UciService(name="uci")
 uci_service.load_model_config("uci_housing_model")
-uci_service.prepare_server(workdir="workdir", port=9393, use_lite=True, use_xpu=True, ir_optim=True)
+uci_service.prepare_server(
+    workdir="workdir", port=9393, use_lite=True, use_xpu=True, ir_optim=True)
 uci_service.run_rpc_service()
 uci_service.run_web_service()
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -19,18 +19,20 @@ import os
 import google.protobuf.text_format
 import numpy as np
 import argparse
-import paddle.fluid as fluid
-import paddle.inference as inference
 from .proto import general_model_config_pb2 as m_config
-from paddle.fluid.core import PaddleTensor
+import paddle.inference as paddle_infer
-from paddle.fluid.core import AnalysisConfig
-from paddle.fluid.core import create_paddle_predictor
 import logging
 logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger("fluid")
+logger = logging.getLogger("LocalPredictor")
 logger.setLevel(logging.INFO)
+precision_map = {
+    'int8': paddle_infer.PrecisionType.Int8,
+    'fp32': paddle_infer.PrecisionType.Float32,
+    'fp16': paddle_infer.PrecisionType.Half,
+}
 class LocalPredictor(object):
    """
@@ -60,9 +62,11 @@ class LocalPredictor(object):
                          use_trt=False,
                          use_lite=False,
                          use_xpu=False,
+                          precision="fp32",
+                          use_calib=False,
                          use_feed_fetch_ops=False):
        """
-        Load model config and set the engine config for the paddle predictor
+        Load model configs and create the paddle predictor by Paddle Inference API.
        Args:
            model_path: model config path.
@@ -75,6 +79,8 @@ class LocalPredictor(object):
            use_trt: use nvidia TensorRT optimization, False default
            use_lite: use Paddle-Lite Engint, False default
            use_xpu: run predict on Baidu Kunlun, False default
+            precision: precision mode, "fp32" default
+            use_calib: use TensorRT calibration, False default
            use_feed_fetch_ops: use feed/fetch ops, False default.
        """
        client_config = "{}/serving_server_conf.prototxt".format(model_path)
@@ -83,14 +89,20 @@ class LocalPredictor(object):
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
        if os.path.exists(os.path.join(model_path, "__params__")):
-            config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "__params__")) 
+            config = paddle_infer.Config(
+                os.path.join(model_path, "__model__"),
+                os.path.join(model_path, "__params__"))
        else:
-            config = AnalysisConfig(model_path) 
+            config = paddle_infer.Config(model_path)
-        logger.info("load_model_config params: model_path:{}, use_gpu:{},\
+        logger.info(
+            "LocalPredictor load_model_config params: model_path:{}, use_gpu:{},\
            gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
-            use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format(
+            use_trt:{}, use_lite:{}, use_xpu: {}, precision: {}, use_calib: {},\
-            model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
+            use_feed_fetch_ops:{}"
-            ir_optim, use_trt, use_lite, use_xpu, use_feed_fetch_ops))
+            .format(model_path, use_gpu, gpu_id, use_profile, thread_num,
+                    mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision,
+                    use_calib, use_feed_fetch_ops))
        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
@@ -106,6 +118,9 @@ class LocalPredictor(object):
            self.fetch_names_to_idx_[var.alias_name] = i
            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+        precision_type = paddle_infer.PrecisionType.Float32
+        if precision.lower() in precision_map:
+            precision_type = precision_map[precision.lower()]
        if use_profile:
            config.enable_profile()
        if mem_optim:
@@ -121,6 +136,7 @@ class LocalPredictor(object):
            config.enable_use_gpu(100, gpu_id)
            if use_trt:
                config.enable_tensorrt_engine(
+                    precision_mode=precision_type,
                    workspace_size=1 << 20,
                    max_batch_size=32,
                    min_subgraph_size=3,
@@ -129,7 +145,7 @@ class LocalPredictor(object):
        if use_lite:
            config.enable_lite_engine(
-                precision_mode=inference.PrecisionType.Float32,
+                precision_mode=precision_type,
                zero_copy=True,
                passes_filter=[],
                ops_filter=[])
@@ -138,11 +154,16 @@ class LocalPredictor(object):
            # 2MB l3 cache
            config.enable_xpu(8 * 1024 * 1024)
-        self.predictor = create_paddle_predictor(config)
+        if not use_gpu and not use_lite:
+            if precision_type == paddle_infer.PrecisionType.Int8:
+                config.enable_quantizer()
+            if precision.lower() == "bf16":
+                config.enable_mkldnn_bfloat16()
+        self.predictor = paddle_infer.create_predictor(config)
    def predict(self, feed=None, fetch=None, batch=False, log_id=0):
        """
-        Predict locally
+        Run model inference by Paddle Inference API.
        Args:
            feed: feed var
@@ -155,14 +176,16 @@ class LocalPredictor(object):
            fetch_map: dict 
        """
        if feed is None or fetch is None:
-            raise ValueError("You should specify feed and fetch for prediction")
+            raise ValueError("You should specify feed and fetch for prediction.\
+                log_id:{}".format(log_id))
        fetch_list = []
        if isinstance(fetch, str):
            fetch_list = [fetch]
        elif isinstance(fetch, list):
            fetch_list = fetch
        else:
-            raise ValueError("Fetch only accepts string and list of string")
+            raise ValueError("Fetch only accepts string and list of string.\
+                log_id:{}".format(log_id))
        feed_batch = []
        if isinstance(feed, dict):
@@ -170,27 +193,21 @@ class LocalPredictor(object):
        elif isinstance(feed, list):
            feed_batch = feed
        else:
-            raise ValueError("Feed only accepts dict and list of dict")
+            raise ValueError("Feed only accepts dict and list of dict.\
+                log_id:{}".format(log_id))
-        int_slot_batch = []
-        float_slot_batch = []
-        int_feed_names = []
-        float_feed_names = []
-        int_shape = []
-        float_shape = []
-        fetch_names = []
-        counter = 0
-        batch_size = len(feed_batch)
+        fetch_names = []
+        # Filter invalid fetch names
        for key in fetch_list:
            if key in self.fetch_names_:
                fetch_names.append(key)
        if len(fetch_names) == 0:
            raise ValueError(
-                "Fetch names should not be empty or out of saved fetch list.")
+                "Fetch names should not be empty or out of saved fetch list.\
-            return {}
+                    log_id:{}".format(log_id))
+        # Assemble the input data of paddle predictor 
        input_names = self.predictor.get_input_names()
        for name in input_names:
            if isinstance(feed[name], list):
@@ -204,27 +221,31 @@ class LocalPredictor(object):
                feed[name] = feed[name].astype("int32")
            else:
                raise ValueError("local predictor receives wrong data type")
-            input_tensor = self.predictor.get_input_tensor(name)
+            input_tensor_handle = self.predictor.get_input_handle(name)
            if "{}.lod".format(name) in feed:
-                input_tensor.set_lod([feed["{}.lod".format(name)]])
+                input_tensor_handle.set_lod([feed["{}.lod".format(name)]])
            if batch == False:
-                input_tensor.copy_from_cpu(feed[name][np.newaxis, :])
+                input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :])
            else:
-                input_tensor.copy_from_cpu(feed[name])
+                input_tensor_handle.copy_from_cpu(feed[name])
-        output_tensors = []
+        output_tensor_handles = []
        output_names = self.predictor.get_output_names()
        for output_name in output_names:
-            output_tensor = self.predictor.get_output_tensor(output_name)
+            output_tensor_handle = self.predictor.get_output_handle(output_name)
-            output_tensors.append(output_tensor)
+            output_tensor_handles.append(output_tensor_handle)
+        # Run inference 
+        self.predictor.run()
+        # Assemble output data of predict results
        outputs = []
-        self.predictor.zero_copy_run()
+        for output_tensor_handle in output_tensor_handles:
-        for output_tensor in output_tensors:
+            output = output_tensor_handle.copy_to_cpu()
-            output = output_tensor.copy_to_cpu()
            outputs.append(output)
        fetch_map = {}
        for i, name in enumerate(fetch):
            fetch_map[name] = outputs[i]
-            if len(output_tensors[i].lod()) > 0:
+            if len(output_tensor_handles[i].lod()) > 0:
-                fetch_map[name + ".lod"] = np.array(output_tensors[i].lod()[
+                fetch_map[name + ".lod"] = np.array(output_tensor_handles[i]
-                    0]).astype('int32')
+                                                    .lod()[0]).astype('int32')
        return fetch_map
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -14,7 +14,6 @@
 # pylint: disable=doc-string-missing
 from . import version
 from . import client
 from .client import *

--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -31,11 +31,18 @@ sys.path.append(
    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
 from .proto import multi_lang_general_model_service_pb2_grpc
+#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
+#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
+#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
+#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
 int64_type = 0
 float32_type = 1
 int32_type = 2
+bytes_type = 3
+#int_type,float_type,string_type are the set of each subdivision classes.
 int_type = set([int64_type, int32_type])
 float_type = set([float32_type])
+string_type = set([bytes_type])
 class _NOPProfiler(object):
@@ -139,10 +146,22 @@ class Client(object):
        from .serving_client import PredictorRes
        self.predictorres_constructor = PredictorRes
-    def load_client_config(self, path):
+    def load_client_config(self, model_config_path_list):
+        if isinstance(model_config_path_list, str):
+            model_config_path_list = [model_config_path_list]
+        elif isinstance(model_config_path_list, list):
+            pass
+        file_path_list = []
+        for single_model_config in model_config_path_list:
+            if os.path.isdir(single_model_config):
+                file_path_list.append("{}/serving_server_conf.prototxt".format(
+                    single_model_config))
+            elif os.path.isfile(single_model_config):
+                file_path_list.append(single_model_config)
        from .serving_client import PredictorClient
        model_conf = m_config.GeneralModelConfig()
-        f = open(path, 'r')
+        f = open(file_path_list[0], 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
@@ -151,19 +170,16 @@ class Client(object):
        # get feed shapes, feed types
        # map feed names to index
        self.client_handle_ = PredictorClient()
-        self.client_handle_.init(path)
+        self.client_handle_.init(file_path_list)
        if "FLAGS_max_body_size" not in os.environ:
            os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
        read_env_flags = ["profile_client", "profile_server", "max_body_size"]
        self.client_handle_.init_gflags([sys.argv[
            0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
-        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.feed_names_to_idx_ = {}  #this is not useful
-        self.feed_names_to_idx_ = {}
-        self.fetch_names_to_type_ = {}
-        self.fetch_names_to_idx_ = {}
        self.lod_tensor_set = set()
-        self.feed_tensor_len = {}
+        self.feed_tensor_len = {}  #this is only used for shape check
        self.key = None
        for i, var in enumerate(model_conf.feed_var):
@@ -178,6 +194,14 @@ class Client(object):
                for dim in self.feed_shapes_[var.alias_name]:
                    counter *= dim
                self.feed_tensor_len[var.alias_name] = counter
+        if len(file_path_list) > 1:
+            model_conf = m_config.GeneralModelConfig()
+            f = open(file_path_list[-1], 'r')
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.fetch_names_to_type_ = {}
+        self.fetch_names_to_idx_ = {}
        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_names_to_idx_[var.alias_name] = i
            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
@@ -288,13 +312,17 @@ class Client(object):
            raise ValueError("Feed only accepts dict and list of dict")
        int_slot_batch = []
-        float_slot_batch = []
        int_feed_names = []
-        float_feed_names = []
        int_shape = []
        int_lod_slot_batch = []
+        float_slot_batch = []
+        float_feed_names = []
        float_lod_slot_batch = []
        float_shape = []
+        string_slot_batch = []
+        string_feed_names = []
+        string_lod_slot_batch = []
+        string_shape = []
        fetch_names = []
        counter = 0
@@ -311,9 +339,11 @@ class Client(object):
        for i, feed_i in enumerate(feed_batch):
            int_slot = []
-            float_slot = []
            int_lod_slot = []
+            float_slot = []
            float_lod_slot = []
+            string_slot = []
+            string_lod_slot = []
            for key in feed_i:
                if ".lod" not in key and key not in self.feed_names_:
                    raise ValueError("Wrong feed name: {}.".format(key))
@@ -368,10 +398,24 @@ class Client(object):
                    else:
                        float_slot.append(feed_i[key])
                        self.all_numpy_input = False
+                #if input is string, feed is not numpy.
+                elif self.feed_types_[key] in string_type:
+                    if i == 0:
+                        string_feed_names.append(key)
+                        string_shape.append(self.feed_shapes_[key])
+                        if "{}.lod".format(key) in feed_i:
+                            string_lod_slot_batch.append(feed_i["{}.lod".format(
+                                key)])
+                        else:
+                            string_lod_slot_batch.append([])
+                    string_slot.append(feed_i[key])
+                    self.has_numpy_input = True
            int_slot_batch.append(int_slot)
-            float_slot_batch.append(float_slot)
            int_lod_slot_batch.append(int_lod_slot)
+            float_slot_batch.append(float_slot)
            float_lod_slot_batch.append(float_lod_slot)
+            string_slot_batch.append(string_slot)
+            string_lod_slot_batch.append(string_lod_slot)
        self.profile_.record('py_prepro_1')
        self.profile_.record('py_client_infer_0')
@@ -381,8 +425,9 @@ class Client(object):
            res = self.client_handle_.numpy_predict(
                float_slot_batch, float_feed_names, float_shape,
                float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
-                int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
+                int_lod_slot_batch, string_slot_batch, string_feed_names,
-                log_id)
+                string_shape, string_lod_slot_batch, fetch_names,
+                result_batch_handle, self.pid, log_id)
        elif self.has_numpy_input == False:
            raise ValueError(
                "Please make sure all of your inputs are numpy array")
@@ -509,8 +554,8 @@ class MultiLangClient(object):
        get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
        )
        resp = self.stub_.GetClientConfig(get_client_config_req)
-        model_config_str = resp.client_config_str
+        model_config_path_list = resp.client_config_str_list
-        self._parse_model_config(model_config_str)
+        self._parse_model_config(model_config_path_list)
    def _flatten_list(self, nested_list):
        for item in nested_list:
@@ -520,25 +565,39 @@ class MultiLangClient(object):
            else:
                yield item
-    def _parse_model_config(self, model_config_str):
+    def _parse_model_config(self, model_config_path_list):
+        if isinstance(model_config_path_list, str):
+            model_config_path_list = [model_config_path_list]
+        elif isinstance(model_config_path_list, list):
+            pass
+        file_path_list = []
+        for single_model_config in model_config_path_list:
+            if os.path.isdir(single_model_config):
+                file_path_list.append("{}/serving_server_conf.prototxt".format(
+                    single_model_config))
+            elif os.path.isfile(single_model_config):
+                file_path_list.append(single_model_config)
        model_conf = m_config.GeneralModelConfig()
-        model_conf = google.protobuf.text_format.Merge(model_config_str,
+        f = open(file_path_list[0], 'r')
-                                                       model_conf)
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.feed_types_ = {}
        self.feed_shapes_ = {}
-        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
-        self.fetch_types_ = {}
        self.lod_tensor_set_ = set()
        for i, var in enumerate(model_conf.feed_var):
            self.feed_types_[var.alias_name] = var.feed_type
            self.feed_shapes_[var.alias_name] = var.shape
            if var.is_lod_tensor:
                self.lod_tensor_set_.add(var.alias_name)
-            else:
+        if len(file_path_list) > 1:
-                counter = 1
+            model_conf = m_config.GeneralModelConfig()
-                for dim in self.feed_shapes_[var.alias_name]:
+            f = open(file_path_list[-1], 'r')
-                    counter *= dim
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.fetch_types_ = {}
        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_types_[var.alias_name] = var.fetch_type
            if var.is_lod_tensor:

--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
--- a/python/paddle_serving_server/dag.py
+++ b/python/paddle_serving_server/dag.py
@@ -13,7 +13,9 @@ class OpMaker(object):
            "general_text_response": "GeneralTextResponseOp",
            "general_single_kv": "GeneralSingleKVOp",
            "general_dist_kv_infer": "GeneralDistKVInferOp",
-            "general_dist_kv": "GeneralDistKVOp"
+            "general_dist_kv": "GeneralDistKVOp",
+            "general_copy": "GeneralCopyOp",
+            "general_detection":"GeneralDetectionOp",
        }
        self.node_name_suffix_ = collections.defaultdict(int)
@@ -45,7 +47,6 @@ class OpMaker(object):
        # overall efficiency.
        return google.protobuf.text_format.MessageToString(node)
 class OpSeqMaker(object):
    def __init__(self):
        self.workflow = server_sdk.Workflow()
@@ -78,7 +79,8 @@ class OpSeqMaker(object):
        workflow_conf.workflows.extend([self.workflow])
        return workflow_conf
+# TODO:Currently, SDK only supports "Sequence".OpGraphMaker is not useful.
+# Config should be changed to adapt command-line for list[dict] or list[list[] ]
 class OpGraphMaker(object):
    def __init__(self):
        self.workflow = server_sdk.Workflow()

--- a/python/paddle_serving_server/rpc_service.py
+++ b/python/paddle_serving_server/rpc_service.py
@@ -11,38 +11,55 @@ from .proto import multi_lang_general_model_service_pb2_grpc
 class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                                     MultiLangGeneralModelServiceServicer):
-    def __init__(self, model_config_path, is_multi_model, endpoints):
+    def __init__(self, model_config_path_list, is_multi_model, endpoints):
        self.is_multi_model_ = is_multi_model
-        self.model_config_path_ = model_config_path
+        self.model_config_path_list = model_config_path_list
        self.endpoints_ = endpoints
-        with open(self.model_config_path_) as f:
+        self._init_bclient(self.model_config_path_list, self.endpoints_)
-            self.model_config_str_ = str(f.read())
+        self._parse_model_config(self.model_config_path_list)
-        self._parse_model_config(self.model_config_str_)
-        self._init_bclient(self.model_config_path_, self.endpoints_)
-    def _init_bclient(self, model_config_path, endpoints, timeout_ms=None):
+    def _init_bclient(self, model_config_path_list, endpoints, timeout_ms=None):
        from paddle_serving_client import Client
        self.bclient_ = Client()
        if timeout_ms is not None:
            self.bclient_.set_rpc_timeout_ms(timeout_ms)
-        self.bclient_.load_client_config(model_config_path)
+        self.bclient_.load_client_config(model_config_path_list)
        self.bclient_.connect(endpoints)
-    def _parse_model_config(self, model_config_str):
+    def _parse_model_config(self, model_config_path_list):
+        if isinstance(model_config_path_list, str):
+            model_config_path_list = [model_config_path_list]
+        elif isinstance(model_config_path_list, list):
+            pass
+        file_path_list = []
+        for single_model_config in model_config_path_list:
+            if os.path.isdir(single_model_config):
+                file_path_list.append("{}/serving_server_conf.prototxt".format(
+                    single_model_config))
+            elif os.path.isfile(single_model_config):
+                file_path_list.append(single_model_config)
        model_conf = m_config.GeneralModelConfig()
-        model_conf = google.protobuf.text_format.Merge(model_config_str,
+        f = open(file_path_list[0], 'r')
-                                                       model_conf)
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.feed_types_ = {}
        self.feed_shapes_ = {}
-        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
-        self.fetch_types_ = {}
        self.lod_tensor_set_ = set()
        for i, var in enumerate(model_conf.feed_var):
            self.feed_types_[var.alias_name] = var.feed_type
            self.feed_shapes_[var.alias_name] = var.shape
            if var.is_lod_tensor:
                self.lod_tensor_set_.add(var.alias_name)
+        if len(file_path_list) > 1:
+            model_conf = m_config.GeneralModelConfig()
+            f = open(file_path_list[-1], 'r')
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.fetch_types_ = {}
        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_types_[var.alias_name] = var.fetch_type
            if var.is_lod_tensor:
@@ -69,11 +86,11 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                v_type = self.feed_types_[name]
                data = None
                if is_python:
-                    if v_type == 0:
+                    if v_type == 0:# int64
                        data = np.frombuffer(var.data, dtype="int64")
-                    elif v_type == 1:
+                    elif v_type == 1:# float32
                        data = np.frombuffer(var.data, dtype="float32")
-                    elif v_type == 2:
+                    elif v_type == 2:# int32
                        data = np.frombuffer(var.data, dtype="int32")
                    else:
                        raise Exception("error type.")
@@ -82,7 +99,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                        data = np.array(list(var.int64_data), dtype="int64")
                    elif v_type == 1:  # float32
                        data = np.array(list(var.float_data), dtype="float32")
-                    elif v_type == 2:
+                    elif v_type == 2:# int32
                        data = np.array(list(var.int_data), dtype="int32")
                    else:
                        raise Exception("error type.")
@@ -138,7 +155,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
        # This porcess and Inference process cannot be operate at the same time.
        # For performance reasons, do not add thread lock temporarily.
        timeout_ms = request.timeout_ms
-        self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms)
+        self._init_bclient(self.model_config_path_list, self.endpoints_, timeout_ms)
        resp = multi_lang_general_model_service_pb2.SimpleResponse()
        resp.err_code = 0
        return resp
@@ -155,6 +172,8 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
        return self._pack_inference_response(ret, fetch_names, is_python)
    def GetClientConfig(self, request, context):
+        #model_config_path_list is list right now.
+        #dict should be added when graphMaker is used.
        resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
-        resp.client_config_str = self.model_config_str_
+        resp.client_config_str_list[:] = self.model_config_path_list
        return resp
\ No newline at end of file
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -22,7 +22,7 @@ import os
 import json
 import base64
 import time
-from multiprocessing import Pool, Process
+from multiprocessing import Process
 from flask import Flask, request
 import sys
 if sys.version_info.major == 2:
@@ -41,7 +41,7 @@ def serve_args():
        "--device", type=str, default="gpu", help="Type of device")
    parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
+        "--model", type=str, default="", nargs="+", help="Model for serving")
    parser.add_argument(
        "--workdir",
        type=str,
@@ -51,6 +51,16 @@ def serve_args():
        "--name", type=str, default="None", help="Default service name")
    parser.add_argument(
        "--use_mkl", default=False, action="store_true", help="Use MKL")
+    parser.add_argument(
+        "--precision",
+        type=str,
+        default="fp32",
+        help="precision mode(fp32, int8, fp16, bf16)")
+    parser.add_argument(
+        "--use_calib",
+        default=False,
+        action="store_true",
+        help="Use TensorRT Calibration")
    parser.add_argument(
        "--mem_optim_off",
        default=False,
@@ -110,15 +120,29 @@ def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
        print("You must specify your serving model")
        exit(-1)
+    for single_model_config in args.model:
+        if os.path.isdir(single_model_config):
+            pass
+        elif os.path.isfile(single_model_config):
+            raise ValueError("The input of --model should be a dir not file.")
    import paddle_serving_server as serving
    op_maker = serving.OpMaker()
-    read_op = op_maker.create('general_reader')
-    general_infer_op = op_maker.create('general_infer')
-    general_response_op = op_maker.create('general_response')
    op_seq_maker = serving.OpSeqMaker()
+    read_op = op_maker.create('general_reader')
    op_seq_maker.add_op(read_op)
-    op_seq_maker.add_op(general_infer_op)
+    for idx, single_model in enumerate(model):
+        infer_op_name = "general_infer"
+        #Temporary support for OCR model,it will be completely revised later
+        #If you want to use this, C++ server must compile with WITH_OPENCV option.
+        if len(model) == 2 and idx == 0 and model[0] == 'ocr_det_model':
+            infer_op_name = "general_detection"
+        general_infer_op = op_maker.create(infer_op_name)
+        op_seq_maker.add_op(general_infer_op)
+    general_response_op = op_maker.create('general_response')
    op_seq_maker.add_op(general_response_op)
    server = None
@@ -133,6 +157,8 @@ def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
    server.use_mkl(use_mkl)
    server.set_max_body_size(max_body_size)
    server.set_port(port)
+    server.set_precision(args.precision)
+    server.set_use_calib(args.use_calib)
    server.use_encryption_model(use_encryption_model)
    if args.product_name != None:
        server.set_product_name(args.product_name)
@@ -166,15 +192,26 @@ def start_gpu_card_model(index, gpuid, port, args):  # pylint: disable=doc-strin
        print("You must specify your serving model")
        exit(-1)
+    for single_model_config in args.model:
+        if os.path.isdir(single_model_config):
+            pass
+        elif os.path.isfile(single_model_config):
+            raise ValueError("The input of --model should be a dir not file.")
    import paddle_serving_server as serving
    op_maker = serving.OpMaker()
-    read_op = op_maker.create('general_reader')
-    general_infer_op = op_maker.create('general_infer')
-    general_response_op = op_maker.create('general_response')
    op_seq_maker = serving.OpSeqMaker()
+    read_op = op_maker.create('general_reader')
    op_seq_maker.add_op(read_op)
-    op_seq_maker.add_op(general_infer_op)
+    for idx, single_model in enumerate(model):
+        infer_op_name = "general_infer"
+        if len(model) == 2 and idx == 0:
+            infer_op_name = "general_detection"
+        else:
+            infer_op_name = "general_infer"
+        general_infer_op = op_maker.create(infer_op_name)
+        op_seq_maker.add_op(general_infer_op)
+    general_response_op = op_maker.create('general_response')
    op_seq_maker.add_op(general_response_op)
    if use_multilang:
@@ -184,6 +221,8 @@ def start_gpu_card_model(index, gpuid, port, args):  # pylint: disable=doc-strin
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.use_mkl(use_mkl)
+    server.set_precision(args.precision)
+    server.set_use_calib(args.use_calib)
    server.set_memory_optimize(mem_optim)
    server.set_ir_optimize(ir_optim)
    server.set_max_body_size(max_body_size)
@@ -269,8 +308,12 @@ class MainService(BaseHTTPRequestHandler):
            return False
        else:
            key = base64.b64decode(post_data["key"].encode())
-            with open(args.model + "/key", "wb") as f:
+            for single_model_config in args.model:
-                f.write(key)
+                if os.path.isfile(single_model_config):
+                    raise ValueError(
+                        "The input of --model should be a dir not file.")
+                with open(single_model_config + "/key", "wb") as f:
+                    f.write(key)
            return True
    def check_key(self, post_data):
@@ -278,12 +321,18 @@ class MainService(BaseHTTPRequestHandler):
            return False
        else:
            key = base64.b64decode(post_data["key"].encode())
-            with open(args.model + "/key", "rb") as f:
+            for single_model_config in args.model:
-                cur_key = f.read()
+                if os.path.isfile(single_model_config):
-            return (key == cur_key)
+                    raise ValueError(
+                        "The input of --model should be a dir not file.")
+                with open(single_model_config + "/key", "rb") as f:
+                    cur_key = f.read()
+                if key != cur_key:
+                    return False
+            return True
    def start(self, post_data):
-        post_data = json.loads(post_data)
+        post_data = json.loads(post_data.decode('utf-8'))
        global p_flag
        if not p_flag:
            if args.use_encryption_model:
@@ -323,7 +372,14 @@ class MainService(BaseHTTPRequestHandler):
 if __name__ == "__main__":
    args = serve_args()
+    for single_model_config in args.model:
+        if os.path.isdir(single_model_config):
+            pass
+        elif os.path.isfile(single_model_config):
+            raise ValueError("The input of --model should be a dir not file.")
    if args.name == "None":
        from .web_service import port_is_available
        if args.use_encryption_model:
@@ -353,7 +409,9 @@ if __name__ == "__main__":
            device=args.device,
            use_lite=args.use_lite,
            use_xpu=args.use_xpu,
-            ir_optim=args.ir_optim)
+            ir_optim=args.ir_optim,
+            precision=args.precision,
+            use_calib=args.use_calib)
        web_service.run_rpc_service()
        app_instance = Flask(__name__)

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -16,8 +16,10 @@ import os
 import tarfile
 import socket
 import paddle_serving_server as paddle_serving_server
+from paddle_serving_server.rpc_service import MultiLangServerServiceServicer
 from .proto import server_configure_pb2 as server_sdk
 from .proto import general_model_config_pb2 as m_config
+from .proto import multi_lang_general_model_service_pb2_grpc
 import google.protobuf.text_format
 import time
 from .version import serving_server_version, version_suffix, device_type
@@ -32,6 +34,7 @@ import platform
 import numpy as np
 import grpc
 import sys
+import collections
 from multiprocessing import Pool, Process
 from concurrent import futures
@@ -39,23 +42,37 @@ from concurrent import futures
 class Server(object):
    def __init__(self):
+        """
+        self.model_toolkit_conf:'list'=[] # The quantity of self.model_toolkit_conf is equal to the InferOp quantity/Engine--OP
+        self.model_conf:'collections.OrderedDict()' # Save the serving_server_conf.prototxt content (feed and fetch information) this is a map for multi-model in a workflow
+        self.workflow_fn:'str'="workflow.prototxt" # Only one for one Service/Workflow
+        self.resource_fn:'str'="resource.prototxt" # Only one for one Service,model_toolkit_fn and general_model_config_fn is recorded in this file
+        self.infer_service_fn:'str'="infer_service.prototxt" # Only one for one Service,Service--Workflow
+        self.model_toolkit_fn:'list'=[] # ["general_infer_0/model_toolkit.prototxt"]The quantity is equal to the InferOp quantity,Engine--OP
+        self.general_model_config_fn:'list'=[] # ["general_infer_0/general_model.prototxt"]The quantity is equal to the InferOp quantity,Feed and Fetch --OP
+        self.subdirectory:'list'=[] # The quantity is equal to the InferOp quantity, and name = node.name = engine.name
+        self.model_config_paths:'collections.OrderedDict()' # Save the serving_server_conf.prototxt path (feed and fetch information) this is a map for multi-model in a workflow
+        """
        self.server_handle_ = None
        self.infer_service_conf = None
-        self.model_toolkit_conf = None
+        self.model_toolkit_conf = []
        self.resource_conf = None
        self.memory_optimization = False
        self.ir_optimization = False
-        self.model_conf = None
+        self.model_conf = collections.OrderedDict()
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
        self.infer_service_fn = "infer_service.prototxt"
-        self.model_toolkit_fn = "model_toolkit.prototxt"
+        self.model_toolkit_fn = []
-        self.general_model_config_fn = "general_model.prototxt"
+        self.general_model_config_fn = []
+        self.subdirectory = []
        self.cube_config_fn = "cube.conf"
        self.workdir = ""
        self.max_concurrency = 0
        self.num_threads = 2
        self.port = 8080
+        self.precision = "fp32"
+        self.use_calib = False
        self.reload_interval_s = 10
        self.max_body_size = 64 * 1024 * 1024
        self.module_path = os.path.dirname(paddle_serving_server.__file__)
@@ -67,12 +84,15 @@ class Server(object):
        self.use_trt = False
        self.use_lite = False
        self.use_xpu = False
-        self.model_config_paths = None  # for multi-model in a workflow
+        self.model_config_paths = collections.OrderedDict()
        self.product_name = None
        self.container_id = None
-    def get_fetch_list(self):
+    def get_fetch_list(self, infer_node_idx=-1):
-        fetch_names = [var.alias_name for var in self.model_conf.fetch_var]
+        fetch_names = [
+            var.alias_name
+            for var in list(self.model_conf.values())[infer_node_idx].fetch_var
+        ]
        return fetch_names
    def set_max_concurrency(self, concurrency):
@@ -95,6 +115,12 @@ class Server(object):
    def set_port(self, port):
        self.port = port
+    def set_precision(self, precision="fp32"):
+        self.precision = precision
+    def set_use_calib(self, use_calib=False):
+        self.use_calib = use_calib
    def set_reload_interval(self, interval):
        self.reload_interval_s = interval
@@ -150,13 +176,13 @@ class Server(object):
    def _prepare_engine(self, model_config_paths, device, use_encryption_model):
        if self.model_toolkit_conf == None:
-            self.model_toolkit_conf = server_sdk.ModelToolkitConf()
+            self.model_toolkit_conf = []
        for engine_name, model_config_path in model_config_paths.items():
            engine = server_sdk.EngineDesc()
            engine.name = engine_name
            # engine.reloadable_meta = model_config_path + "/fluid_time_file"
-            engine.reloadable_meta = self.workdir + "/fluid_time_file"
+            engine.reloadable_meta = model_config_path + "/fluid_time_file"
            os.system("touch {}".format(engine.reloadable_meta))
            engine.reloadable_type = "timestamp_ne"
            engine.runtime_thread_num = 0
@@ -168,6 +194,10 @@ class Server(object):
            engine.use_trt = self.use_trt
            engine.use_lite = self.use_lite
            engine.use_xpu = self.use_xpu
+            engine.use_gpu = False
+            if self.device == "gpu":
+                engine.use_gpu = True
            if os.path.exists('{}/__params__'.format(model_config_path)):
                engine.combined_model = True
            else:
@@ -175,8 +205,8 @@ class Server(object):
            if use_encryption_model:
                engine.encrypted_model = True
            engine.type = "PADDLE_INFER"
+            self.model_toolkit_conf.append(server_sdk.ModelToolkitConf())
-            self.model_toolkit_conf.engines.extend([engine])
+            self.model_toolkit_conf[-1].engines.extend([engine])
    def _prepare_infer_service(self, port):
        if self.infer_service_conf == None:
@@ -190,79 +220,110 @@ class Server(object):
    def _prepare_resource(self, workdir, cube_conf):
        self.workdir = workdir
        if self.resource_conf == None:
-            with open("{}/{}".format(workdir, self.general_model_config_fn),
-                      "w") as fout:
-                fout.write(str(self.model_conf))
            self.resource_conf = server_sdk.ResourceConf()
-            for workflow in self.workflow_conf.workflows:
+            for idx, op_general_model_config_fn in enumerate(
-                for node in workflow.nodes:
+                    self.general_model_config_fn):
-                    if "dist_kv" in node.name:
+                with open("{}/{}".format(workdir, op_general_model_config_fn),
-                        self.resource_conf.cube_config_path = workdir
+                          "w") as fout:
-                        self.resource_conf.cube_config_file = self.cube_config_fn
+                    fout.write(str(list(self.model_conf.values())[idx]))
-                        if cube_conf == None:
+                for workflow in self.workflow_conf.workflows:
-                            raise ValueError(
+                    for node in workflow.nodes:
-                                "Please set the path of cube.conf while use dist_kv op."
+                        if "dist_kv" in node.name:
-                            )
+                            self.resource_conf.cube_config_path = workdir
-                        shutil.copy(cube_conf, workdir)
+                            self.resource_conf.cube_config_file = self.cube_config_fn
-            self.resource_conf.model_toolkit_path = workdir
+                            if cube_conf == None:
-            self.resource_conf.model_toolkit_file = self.model_toolkit_fn
+                                raise ValueError(
-            self.resource_conf.general_model_path = workdir
+                                    "Please set the path of cube.conf while use dist_kv op."
-            self.resource_conf.general_model_file = self.general_model_config_fn
+                                )
-            if self.product_name != None:
+                            shutil.copy(cube_conf, workdir)
-                self.resource_conf.auth_product_name = self.product_name
+                            if "quant" in node.name:
-            if self.container_id != None:
+                                self.resource_conf.cube_quant_bits = 8
-                self.resource_conf.auth_container_id = self.container_id
+                self.resource_conf.model_toolkit_path.extend([workdir])
+                self.resource_conf.model_toolkit_file.extend(
+                    [self.model_toolkit_fn[idx]])
+                self.resource_conf.general_model_path.extend([workdir])
+                self.resource_conf.general_model_file.extend(
+                    [op_general_model_config_fn])
+                #TODO:figure out the meaning of product_name and container_id.
+                if self.product_name != None:
+                    self.resource_conf.auth_product_name = self.product_name
+                if self.container_id != None:
+                    self.resource_conf.auth_container_id = self.container_id
    def _write_pb_str(self, filepath, pb_obj):
        with open(filepath, "w") as fout:
            fout.write(str(pb_obj))
-    def load_model_config(self, model_config_paths):
+    def load_model_config(self, model_config_paths_args):
        # At present, Serving needs to configure the model path in
        # the resource.prototxt file to determine the input and output
        # format of the workflow. To ensure that the input and output
        # of multiple models are the same.
-        workflow_oi_config_path = None
+        if isinstance(model_config_paths_args, str):
-        if isinstance(model_config_paths, str):
+            model_config_paths_args = [model_config_paths_args]
+        for single_model_config in model_config_paths_args:
+            if os.path.isdir(single_model_config):
+                pass
+            elif os.path.isfile(single_model_config):
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
+        if isinstance(model_config_paths_args, list):
            # If there is only one model path, use the default infer_op.
            # Because there are several infer_op type, we need to find
            # it from workflow_conf.
-            default_engine_names = [
+            default_engine_types = [
-                'general_infer_0', 'general_dist_kv_infer_0',
+                'GeneralInferOp',
-                'general_dist_kv_quant_infer_0'
+                'GeneralDistKVInferOp',
+                'GeneralDistKVQuantInferOp',
+                'GeneralDetectionOp',
            ]
-            engine_name = None
+            # now only support single-workflow.
+            # TODO:support multi-workflow
+            model_config_paths_list_idx = 0
            for node in self.workflow_conf.workflows[0].nodes:
-                if node.name in default_engine_names:
+                if node.type in default_engine_types:
-                    engine_name = node.name
+                    if node.name is None:
-                    break
+                        raise Exception(
-            if engine_name is None:
+                            "You have set the engine_name of Op. Please use the form {op: model_path} to configure model path"
-                raise Exception(
+                        )
-                    "You have set the engine_name of Op. Please use the form {op: model_path} to configure model path"
-                )
+                    f = open("{}/serving_server_conf.prototxt".format(
-            self.model_config_paths = {engine_name: model_config_paths}
+                        model_config_paths_args[model_config_paths_list_idx]),
-            workflow_oi_config_path = self.model_config_paths[engine_name]
+                             'r')
-        elif isinstance(model_config_paths, dict):
+                    self.model_conf[
-            self.model_config_paths = {}
+                        node.name] = google.protobuf.text_format.Merge(
-            for node_str, path in model_config_paths.items():
+                            str(f.read()), m_config.GeneralModelConfig())
+                    self.model_config_paths[
+                        node.name] = model_config_paths_args[
+                            model_config_paths_list_idx]
+                    self.general_model_config_fn.append(
+                        node.name + "/general_model.prototxt")
+                    self.model_toolkit_fn.append(node.name +
+                                                 "/model_toolkit.prototxt")
+                    self.subdirectory.append(node.name)
+                    model_config_paths_list_idx += 1
+                    if model_config_paths_list_idx == len(
+                            model_config_paths_args):
+                        break
+        #Right now, this is not useful.
+        elif isinstance(model_config_paths_args, dict):
+            self.model_config_paths = collections.OrderedDict()
+            for node_str, path in model_config_paths_args.items():
                node = server_sdk.DAGNode()
                google.protobuf.text_format.Parse(node_str, node)
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+            f = open("{}/serving_server_conf.prototxt".format(path), 'r')
-                1]
+            self.model_conf[node.name] = google.protobuf.text_format.Merge(
+                str(f.read()), m_config.GeneralModelConfig())
        else:
-            raise Exception("The type of model_config_paths must be str or "
+            raise Exception(
-                            "dict({op: model_path}), not {}.".format(
+                "The type of model_config_paths must be str or list or "
-                                type(model_config_paths)))
+                "dict({op: model_path}), not {}.".format(
+                    type(model_config_paths_args)))
-        self.model_conf = m_config.GeneralModelConfig()
-        f = open(
-            "{}/serving_server_conf.prototxt".format(workflow_oi_config_path),
-            'r')
-        self.model_conf = google.protobuf.text_format.Merge(
-            str(f.read()), self.model_conf)
        # check config here
        # print config here
@@ -272,7 +333,6 @@ class Server(object):
    def get_device_version(self):
        avx_flag = False
        mkl_flag = self.mkl_flag
-        openblas_flag = False
        r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1")
        if r == 0:
            avx_flag = True
@@ -367,7 +427,9 @@ class Server(object):
            os.system("mkdir -p {}".format(workdir))
        else:
            os.system("mkdir -p {}".format(workdir))
-        os.system("touch {}/fluid_time_file".format(workdir))
+        for subdir in self.subdirectory:
+            os.system("mkdir -p {}/{}".format(workdir, subdir))
+            os.system("touch {}/{}/fluid_time_file".format(workdir, subdir))
        if not self.port_is_available(port):
            raise SystemExit("Port {} is already used".format(port))
@@ -380,14 +442,17 @@ class Server(object):
        self.workdir = workdir
        infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
-        workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
-        resource_fn = "{}/{}".format(workdir, self.resource_fn)
-        model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn)
        self._write_pb_str(infer_service_fn, self.infer_service_conf)
+        workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
        self._write_pb_str(workflow_fn, self.workflow_conf)
+        resource_fn = "{}/{}".format(workdir, self.resource_fn)
        self._write_pb_str(resource_fn, self.resource_conf)
-        self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf)
+        for idx, single_model_toolkit_fn in enumerate(self.model_toolkit_fn):
+            model_toolkit_fn = "{}/{}".format(workdir, single_model_toolkit_fn)
+            self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf[idx])
    def port_is_available(self, port):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
@@ -419,6 +484,8 @@ class Server(object):
                      "-max_concurrency {} " \
                      "-num_threads {} " \
                      "-port {} " \
+                      "-precision {} " \
+                      "-use_calib {} " \
                      "-reload_interval_s {} " \
                      "-resource_path {} " \
                      "-resource_file {} " \
@@ -432,6 +499,8 @@ class Server(object):
                          self.max_concurrency,
                          self.num_threads,
                          self.port,
+                          self.precision,
+                          self.use_calib,
                          self.reload_interval_s,
                          self.workdir,
                          self.resource_fn,
@@ -447,6 +516,8 @@ class Server(object):
                      "-max_concurrency {} " \
                      "-num_threads {} " \
                      "-port {} " \
+                      "-precision {} " \
+                      "-use_calib {} " \
                      "-reload_interval_s {} " \
                      "-resource_path {} " \
                      "-resource_file {} " \
@@ -461,6 +532,8 @@ class Server(object):
                          self.max_concurrency,
                          self.num_threads,
                          self.port,
+                          self.precision,
+                          self.use_calib,
                          self.reload_interval_s,
                          self.workdir,
                          self.resource_fn,
@@ -481,7 +554,7 @@ class MultiLangServer(object):
        self.worker_num_ = 4
        self.body_size_ = 64 * 1024 * 1024
        self.concurrency_ = 100000
-        self.is_multi_model_ = False  # for model ensemble
+        self.is_multi_model_ = False  # for model ensemble, which is not useful right now.
    def set_max_concurrency(self, concurrency):
        self.concurrency_ = concurrency
@@ -509,6 +582,12 @@ class MultiLangServer(object):
    def set_port(self, port):
        self.gport_ = port
+    def set_precision(self, precision="fp32"):
+        self.precision = precision
+    def set_use_calib(self, use_calib=False):
+        self.use_calib = use_calib
    def set_reload_interval(self, interval):
        self.bserver_.set_reload_interval(interval)
@@ -530,17 +609,55 @@ class MultiLangServer(object):
    def set_gpuid(self, gpuid=0):
        self.bserver_.set_gpuid(gpuid)
-    def load_model_config(self, server_config_paths, client_config_path=None):
+    def load_model_config(self,
-        self.bserver_.load_model_config(server_config_paths)
+                          server_config_dir_paths,
+                          client_config_path=None):
+        if isinstance(server_config_dir_paths, str):
+            server_config_dir_paths = [server_config_dir_paths]
+        elif isinstance(server_config_dir_paths, list):
+            pass
+        else:
+            raise Exception("The type of model_config_paths must be str or list"
+                            ", not {}.".format(type(server_config_dir_paths)))
+        for single_model_config in server_config_dir_paths:
+            if os.path.isdir(single_model_config):
+                pass
+            elif os.path.isfile(single_model_config):
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
+        self.bserver_.load_model_config(server_config_dir_paths)
        if client_config_path is None:
-            if isinstance(server_config_paths, dict):
+            #now dict is not useful.
+            if isinstance(server_config_dir_paths, dict):
                self.is_multi_model_ = True
-                client_config_path = '{}/serving_server_conf.prototxt'.format(
+                client_config_path = []
-                    list(server_config_paths.items())[0][1])
+                for server_config_path_items in list(
+                        server_config_dir_paths.items()):
+                    client_config_path.append(server_config_path_items[1])
+            elif isinstance(server_config_dir_paths, list):
+                self.is_multi_model_ = False
+                client_config_path = server_config_dir_paths
            else:
-                client_config_path = '{}/serving_server_conf.prototxt'.format(
+                raise Exception(
-                    server_config_paths)
+                    "The type of model_config_paths must be str or list or "
-        self.bclient_config_path_ = client_config_path
+                    "dict({op: model_path}), not {}.".format(
+                        type(server_config_dir_paths)))
+        if isinstance(client_config_path, str):
+            client_config_path = [client_config_path]
+        elif isinstance(client_config_path, list):
+            pass
+        else:  # dict is not support right now.
+            raise Exception(
+                "The type of client_config_path must be str or list or "
+                "dict({op: model_path}), not {}.".format(
+                    type(client_config_path)))
+        if len(client_config_path) != len(server_config_dir_paths):
+            raise Warning(
+                "The len(client_config_path) is {}, != len(server_config_dir_paths) {}."
+                .format(len(client_config_path), len(server_config_dir_paths)))
+        self.bclient_config_path_list = client_config_path
    def prepare_server(self,
                       workdir=None,
@@ -586,7 +703,7 @@ class MultiLangServer(object):
            maximum_concurrent_rpcs=self.concurrency_)
        multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server(
            MultiLangServerServiceServicer(
-                self.bclient_config_path_, self.is_multi_model_,
+                self.bclient_config_path_list, self.is_multi_model_,
                ["0.0.0.0:{}".format(self.port_list_[0])]), server)
        server.add_insecure_port('[::]:{}'.format(self.gport_))
        server.start()

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -23,11 +23,11 @@ from paddle_serving_server.serve import start_multi_card
 import socket
 import sys
 import numpy as np
-import paddle_serving_server as serving
+import os
 from paddle_serving_server import pipeline
 from paddle_serving_server.pipeline import Op
 def port_is_available(port):
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
        sock.settimeout(2)
@@ -37,6 +37,7 @@ def port_is_available(port):
    else:
        return False
 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
@@ -64,23 +65,43 @@ class WebService(object):
    def run_service(self):
        self._server.run_server()
-    def load_model_config(self, model_config):
+    def load_model_config(self,
-        print("This API will be deprecated later. Please do not use it")
+                          server_config_dir_paths,
-        self.model_config = model_config
+                          client_config_path=None):
-        import os
+        if isinstance(server_config_dir_paths, str):
+            server_config_dir_paths = [server_config_dir_paths]
+        elif isinstance(server_config_dir_paths, list):
+            pass
+        for single_model_config in server_config_dir_paths:
+            if os.path.isdir(single_model_config):
+                pass
+            elif os.path.isfile(single_model_config):
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
+        self.server_config_dir_paths = server_config_dir_paths
        from .proto import general_model_config_pb2 as m_config
        import google.protobuf.text_format
-        if os.path.isdir(model_config):
+        file_path_list = []
-            client_config = "{}/serving_server_conf.prototxt".format(
+        for single_model_config in self.server_config_dir_paths:
-                model_config)
+            file_path_list.append("{}/serving_server_conf.prototxt".format(
-        elif os.path.isfile(model_config):
+                single_model_config))
-            client_config = model_config
        model_conf = m_config.GeneralModelConfig()
-        f = open(client_config, 'r')
+        f = open(file_path_list[0], 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
        self.feed_vars = {var.name: var for var in model_conf.feed_var}
+        if len(file_path_list) > 1:
+            model_conf = m_config.GeneralModelConfig()
+            f = open(file_path_list[-1], 'r')
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
        self.fetch_vars = {var.name: var for var in model_conf.fetch_var}
+        if client_config_path == None:
+            self.client_config_path = self.server_config_dir_paths
    def set_gpus(self, gpus):
        print("This API will be deprecated later. Please do not use it")
@@ -94,21 +115,31 @@ class WebService(object):
                            mem_optim=True,
                            use_lite=False,
                            use_xpu=False,
-                            ir_optim=False):
+                            ir_optim=False,
+                            precision="fp32",
+                            use_calib=False):
        device = "gpu"
        if gpuid == -1:
            if use_lite:
                device = "arm"
            else:
                device = "cpu"
-        op_maker = serving.OpMaker()
+        op_maker = OpMaker()
-        read_op = op_maker.create('general_reader')
-        general_infer_op = op_maker.create('general_infer')
-        general_response_op = op_maker.create('general_response')
        op_seq_maker = OpSeqMaker()
+        read_op = op_maker.create('general_reader')
        op_seq_maker.add_op(read_op)
-        op_seq_maker.add_op(general_infer_op)
+        for idx, single_model in enumerate(self.server_config_dir_paths):
+            infer_op_name = "general_infer"
+            if len(self.server_config_dir_paths) == 2 and idx == 0:
+                infer_op_name = "general_detection"
+            else:
+                infer_op_name = "general_infer"
+            general_infer_op = op_maker.create(infer_op_name)
+            op_seq_maker.add_op(general_infer_op)
+        general_response_op = op_maker.create('general_response')
        op_seq_maker.add_op(general_response_op)
        server = Server()
@@ -117,13 +148,16 @@ class WebService(object):
        server.set_memory_optimize(mem_optim)
        server.set_ir_optimize(ir_optim)
        server.set_device(device)
+        server.set_precision(precision)
+        server.set_use_calib(use_calib)
        if use_lite:
            server.set_lite()
        if use_xpu:
            server.set_xpu()
-        server.load_model_config(self.model_config)
+        server.load_model_config(self.server_config_dir_paths
+                                 )  #brpc Server support server_config_dir_paths
        if gpuid >= 0:
            server.set_gpuid(gpuid)
        server.prepare_server(workdir=workdir, port=port, device=device)
@@ -136,6 +170,8 @@ class WebService(object):
                       workdir="",
                       port=9393,
                       device="gpu",
+                       precision="fp32",
+                       use_calib=False,
                       use_lite=False,
                       use_xpu=False,
                       ir_optim=False,
@@ -165,7 +201,9 @@ class WebService(object):
                    mem_optim=mem_optim,
                    use_lite=use_lite,
                    use_xpu=use_xpu,
-                    ir_optim=ir_optim))
+                    ir_optim=ir_optim,
+                    precision=precision,
+                    use_calib=use_calib))
        else:
            for i, gpuid in enumerate(self.gpus):
                self.rpc_service_list.append(
@@ -177,13 +215,14 @@ class WebService(object):
                        mem_optim=mem_optim,
                        use_lite=use_lite,
                        use_xpu=use_xpu,
-                        ir_optim=ir_optim))
+                        ir_optim=ir_optim,
+                        precision=precision,
+                        use_calib=use_calib))
    def _launch_web_service(self):
        gpu_num = len(self.gpus)
        self.client = Client()
-        self.client.load_client_config("{}/serving_server_conf.prototxt".format(
+        self.client.load_client_config(self.client_config_path)
-            self.model_config))
        endpoints = ""
        if gpu_num > 0:
            for i in range(gpu_num):
@@ -264,14 +303,24 @@ class WebService(object):
        self.app_instance = app_instance
    def _launch_local_predictor(self, gpu):
+        # actually, LocalPredictor is like a server, but it is WebService Request initiator
+        # for WebService it is a Client.
+        # local_predictor only support single-Model DirPath - Type:str
+        # so the input must be self.server_config_dir_paths[0]
        from paddle_serving_app.local_predict import LocalPredictor
        self.client = LocalPredictor()
        if gpu:
+            # if user forget to call function `set_gpus` to set self.gpus.
+            # default self.gpus = [0].
+            if len(self.gpus) == 0:
+                self.gpus.append(0)
            self.client.load_model_config(
-                "{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0])
+                self.server_config_dir_paths[0],
+                use_gpu=True,
+                gpu_id=self.gpus[0])
        else:
            self.client.load_model_config(
-                "{}".format(self.model_config), use_gpu=False)
+                self.server_config_dir_paths[0], use_gpu=False)
    def run_web_service(self):
        print("This API will be deprecated later. Please do not use it")

--- a/python/pipeline/pipeline_server.py
+++ b/python/pipeline/pipeline_server.py
@@ -238,6 +238,8 @@ class PipelineServer(object):
                "devices": "",
                "mem_optim": True,
                "ir_optim": False,
+                "precision": "fp32",
+                "use_calib": False,
            },
        }
        for op in self._used_op:
@@ -394,6 +396,8 @@ class ServerYamlConfChecker(object):
            "devices": "",
            "mem_optim": True,
            "ir_optim": False,
+            "precision": "fp32",
+            "use_calib": False,
        }
        conf_type = {
            "model_config": str,
@@ -403,6 +407,8 @@ class ServerYamlConfChecker(object):
            "devices": str,
            "mem_optim": bool,
            "ir_optim": bool,
+            "precision": str,
+            "use_calib": bool,
        }
        conf_qualification = {"thread_num": (">=", 1), }
        ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,

--- a/tools/scripts/ipipe_py3.sh
+++ b/tools/scripts/ipipe_py3.sh
@@ -8,11 +8,26 @@ echo "#                                                              #"
 echo "#                                                              #"
 echo "#                                                              #"
 echo "################################################################"
 export GOPATH=$HOME/go
 export PATH=$PATH:$GOROOT/bin:$GOPATH/bin
 export CUDA_INCLUDE_DIRS=/usr/local/cuda-10.2/include
 export PYTHONROOT=/usr/local
+export PYTHONIOENCODING=utf-8
+build_path=/workspace/Serving/
+error_words="Fail|DENIED|UNKNOWN|None"
+log_dir=${build_path}logs/
+data=/root/.cache/serving_data/
+dir=`pwd`
+RED_COLOR='\E[1;31m'
+GREEN_COLOR='\E[1;32m'
+YELOW_COLOR='\E[1;33m'
+RES='\E[0m'
+cuda_version=`cat /usr/local/cuda/version.txt`
+if [ $? -ne 0 ]; then
+    cuda_version=11
+fi
 go env -w GO111MODULE=on
 go env -w GOPROXY=https://goproxy.cn,direct
@@ -21,90 +36,141 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
 go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
 go get -u google.golang.org/grpc@v1.33.0
-build_path=/workspace/Serving/
+build_whl_list=(build_cpu_server build_gpu_server build_client build_app)
-build_whl_list=(build_gpu_server build_client build_cpu_server build_app)
+rpc_model_list=(grpc_fit_a_line grpc_yolov4 pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc \
-rpc_model_list=(grpc_impl pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc lac_rpc \
+lac_rpc cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
-cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
+criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt)
-criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu)
+http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http\
-http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http)
+pipeline_ocr_cpu_http)
-function setproxy(){
+function setproxy() {
-  export http_proxy=${proxy}
+    export http_proxy=${proxy}
-  export https_proxy=${proxy}
+    export https_proxy=${proxy}
 }
-function unsetproxy(){
+function unsetproxy() {
-  unset http_proxy
+    unset http_proxy
-  unset https_proxy
+    unset https_proxy
 }
-function kill_server_process(){
+function kill_server_process() {
-  kill `ps -ef|grep $1 |awk '{print $2}'`
+    kill `ps -ef | grep serving | awk '{print $2}'` > /dev/null 2>&1
-  kill `ps -ef|grep serving |awk '{print $2}'`
+    kill `ps -ef | grep python | awk '{print $2}'` > /dev/null 2>&1
+    echo -e "${GREEN_COLOR}process killed...${RES}"
 }
 function check() {
    cd ${build_path}
    if [ ! -f paddle_serving_app* ]; then
-      echo "paddle_serving_app is compiled failed, please check your pull request"
+        echo "paddle_serving_app is compiled failed, please check your pull request"
-      exit 1
+        exit 1
    elif [ ! -f paddle_serving_server-* ]; then
-      echo "paddle_serving_server-cpu is compiled failed, please check your pull request"
+        echo "paddle_serving_server-cpu is compiled failed, please check your pull request"
-      exit 1
+        exit 1
    elif [ ! -f paddle_serving_server_* ]; then
-      echo "paddle_serving_server_gpu is compiled failed, please check your pull request"
+        echo "paddle_serving_server_gpu is compiled failed, please check your pull request"
-      exit 1
+        exit 1
    elif [ ! -f paddle_serving_client* ]; then
-      echo "paddle_serving_server_client is compiled failed, please check your pull request"
+        echo "paddle_serving_server_client is compiled failed, please check your pull request"
-      exit 1
+        exit 1
    else
-      echo "paddle serving build passed"
+        echo "paddle serving build passed"
    fi
 }
 function check_result() {
-    if [ $? -ne 0 ];then
+    if [ $? == 0 ]; then
-      echo -e "\033[4;31;42m$1 model runs failed, please check your pull request or modify test case! \033[0m"
+        echo -e "${GREEN_COLOR}$1 execute normally${RES}"
-      exit 1
+        if [ $1 == "server" ]; then
+            sleep $2
+            tail ${dir}server_log.txt | tee -a ${log_dir}server_total.txt
+        fi
+        if [ $1 == "client" ]; then
+            tail ${dir}client_log.txt | tee -a ${log_dir}client_total.txt
+            grep -E "${error_words}" ${dir}client_log.txt > /dev/null
+            if [ $? == 0 ]; then
+                echo -e "${RED_COLOR}$1 error command${RES}\n" | tee -a ${log_dir}server_total.txt ${log_dir}client_total.txt
+                error_log $2
+            else
+                echo -e "${GREEN_COLOR}$2${RES}\n" | tee -a ${log_dir}server_total.txt ${log_dir}client_total.txt
+            fi
+        fi
    else
-      echo -e "\033[4;37;42m$1 model runs successfully, congratulations! \033[0m"
+        echo -e "${RED_COLOR}$1 error command${RES}\n" | tee -a ${log_dir}server_total.txt ${log_dir}client_total.txt
+        tail ${dir}client_log.txt | tee -a ${log_dir}client_total.txt
+        error_log $2
    fi
 }
-function before_hook(){
+function error_log() {
-  setproxy
+    arg=${1//\//_}
-  cd ${build_path}/python
+    echo "-----------------------------" | tee -a ${log_dir}error_models.txt
-  pip3.6 install --upgrade pip
+    arg=${arg%% *}
-  pip3.6 install requests
+    arr=(${arg//_/ })
-  pip3.6 install -r requirements.txt
+    if [ ${arr[@]: -1} == 1 -o ${arr[@]: -1} == 2 ]; then
-  pip3.6 install numpy==1.16.4
+        model=${arr[*]:0:${#arr[*]}-3}
-  echo "before hook configuration is successful.... "
+        deployment=${arr[*]: -3}
+    else
+        model=${arr[*]:0:${#arr[*]}-2}
+        deployment=${arr[*]: -2}
+    fi
+    echo "model: ${model// /_}" | tee -a ${log_dir}error_models.txt
+    echo "deployment: ${deployment// /_}" | tee -a ${log_dir}error_models.txt
+    echo "py_version: python3.6" | tee -a ${log_dir}error_models.txt
+    echo "cuda_version: ${cuda_version}" | tee -a ${log_dir}error_models.txt
+    echo "status: Failed" | tee -a ${log_dir}error_models.txt
+    echo -e "-----------------------------\n\n" | tee -a ${log_dir}error_models.txt
+    prefix=${arg//\//_}
+    for file in ${dir}*
+    do
+        cp ${file} ${log_dir}error/${prefix}_${file##*/}
+    done
+}
+function check_dir() {
+    if [ ! -d "$1" ]
+    then
+        mkdir -p $1
+    fi
 }
-function run_env(){
+function link_data() {
-  setproxy
+    for file in $1*
-  pip3.6 install --upgrade nltk==3.4
+    do
-  pip3.6 install --upgrade scipy==1.2.1
+        if [ ! -h ${file##*/} ]
-  pip3.6 install --upgrade setuptools==41.0.0
+        then
-  pip3.6 install paddlehub ujson paddlepaddle==2.0.0
+            ln -s ${file} ./${file##*/}
-  echo "run env configuration is successful.... "
+        fi
+    done
 }
-function run_gpu_env(){
+function before_hook() {
-  cd ${build_path}
+    setproxy
-  export LD_LIBRARY_PATH=/usr/local/lib64/python3.6/site-packages/paddle/libs/:$LD_LIBRARY_PATH
+    unsetproxy
-  export LD_LIBRARY_PATH=/workspace/Serving/build_gpu/third_party/install/Paddle/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mklml/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mkldnn/lib/:$LD_LIBRARY_PATH
+    cd ${build_path}/python
-  export SERVING_BIN=${build_path}/build_gpu/core/general-server/serving
+    python3.6 -m pip install --upgrade pip
-  echo "run gpu env configuration is successful.... "
+    python3.6 -m pip install requests
+    python3.6 -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+    python3.6 -m pip install numpy==1.16.4
+    python3.6 -m pip install paddlehub -i https://mirror.baidu.com/pypi/simple
+    echo "before hook configuration is successful.... "
 }
-function run_cpu_env(){
+function run_env() {
-  cd ${build_path}
+    setproxy
-  export LD_LIBRARY_PATH=/usr/local/lib64/python3.6/site-packages/paddle/libs/:$LD_LIBRARY_PATH
+    python3.6 -m pip install --upgrade nltk==3.4
-  export LD_LIBRARY_PATH=/workspace/Serving/build_cpu/third_party/install/Paddle/lib/:$LD_LIBRARY_PATH
+    python3.6 -m pip install --upgrade scipy==1.2.1
-  export SERVING_BIN=${build_path}/build_cpu/core/general-server/serving
+    python3.6 -m pip install --upgrade setuptools==41.0.0
-  echo "run cpu env configuration is successful.... "
+    python3.6 -m pip install paddlehub ujson paddlepaddle==2.0.0
+    echo "run env configuration is successful.... "
+}
+function run_gpu_env() {
+    cd ${build_path}
+    export LD_LIBRARY_PATH=/usr/local/lib64/python3.6/site-packages/paddle/libs/:$LD_LIBRARY_PATH
+    export LD_LIBRARY_PATH=/workspace/Serving/build_gpu/third_party/install/Paddle/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mklml/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mkldnn/lib/:$LD_LIBRARY_PATH
+    export SERVING_BIN=${build_path}/build_gpu/core/general-server/serving
+    echo "run gpu env configuration is successful.... "
 }
 function build_gpu_server() {
@@ -124,573 +190,643 @@ function build_gpu_server() {
          -DSERVER=ON \
          -DTENSORRT_ROOT=/usr \
          -DWITH_GPU=ON ..
-    make -j18
+    make -j32
-    make -j18
+    make -j32
-    make install -j18
+    make install -j32
-    pip3.6 uninstall paddle-serving-server-gpu -y
+    python3.6 -m pip uninstall paddle-serving-server-gpu -y
-    pip3.6 install ${build_path}/build/python/dist/*
+    python3.6 -m pip install ${build_path}/build/python/dist/*
    cp  ${build_path}/build/python/dist/* ../
    cp -r ${build_path}/build/ ${build_path}/build_gpu
 }
-function build_client() {
+function build_cpu_server(){
-     setproxy
+    setproxy
-     cd  ${build_path}
+    cd ${build_path}
-     if [ -d build ];then
+    if [ -d build_cpu ];then
-          rm -rf build
+        rm -rf build_cpu
-     fi
+    fi
-     mkdir build && cd build
+    if [ -d build ];then
-     cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
+        rm -rf build
-           -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
+    fi
-           -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
+    mkdir build && cd build
-           -DCLIENT=ON ..
+    cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-     make -j18
+          -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
-     make -j18
+          -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-     cp ${build_path}/build/python/dist/* ../
+          -DWITH_GPU=OFF \
-     pip3.6 uninstall paddle-serving-client -y
+          -DSERVER=ON ..
-     pip3.6 install ${build_path}/build/python/dist/*
+    make -j32
+    make -j32
+    make install -j32
+    cp ${build_path}/build/python/dist/* ../
+    python3.6 -m pip uninstall paddle-serving-server -y
+    python3.6 -m pip install ${build_path}/build/python/dist/*
+    cp -r ${build_path}/build/ ${build_path}/build_cpu
 }
-function build_cpu_server(){
+function build_client() {
-      setproxy
+    setproxy
-      cd ${build_path}
+    cd  ${build_path}
-      if [ -d build_cpu ];then
+    if [ -d build ];then
-          rm -rf build_cpu
+        rm -rf build
-      fi
+    fi
-      if [ -d build ];then
+    mkdir build && cd build
-          rm -rf build
+    cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-      fi
+         -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
-      mkdir build && cd build
+         -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-      cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
+         -DCLIENT=ON ..
-            -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
+    make -j32
-            -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
+    make -j32
-            -DWITH_GPU=OFF \
+    cp ${build_path}/build/python/dist/* ../
-            -DSERVER=ON ..
+    python3.6 -m pip uninstall paddle-serving-client -y
-      make -j18
+    python3.6 -m pip install ${build_path}/build/python/dist/*
-      make -j18
-      make install -j18
-      cp ${build_path}/build/python/dist/* ../
-      pip3.6 uninstall paddle-serving-server -y
-      pip3.6 install ${build_path}/build/python/dist/*
-      cp -r ${build_path}/build/ ${build_path}/build_cpu
 }
 function build_app() {
-  setproxy
+    setproxy
-  pip3.6 install paddlehub ujson Pillow
+    python3.6 -m pip install paddlehub ujson Pillow
-  pip3.6 install paddlepaddle==2.0.0
+    python3.6 -m pip install paddlepaddle==2.0.0
-  cd ${build_path}
+    cd ${build_path}
-  if [ -d build ];then
+    if [ -d build ];then
-      rm -rf build
+        rm -rf build
-  fi
+    fi
-  mkdir build && cd build
+    mkdir build && cd build
-  cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
+    cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-        -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython3.6.so \
+          -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython3.6.so \
-        -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
+          -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-        -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
+          -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
-  make
+    make
-  cp ${build_path}/build/python/dist/* ../
+    cp ${build_path}/build/python/dist/* ../
-  pip3.6 uninstall paddle-serving-app -y
+    python3.6 -m pip uninstall paddle-serving-app -y
-  pip3.6 install ${build_path}/build/python/dist/*
+    python3.6 -m pip install ${build_path}/build/python/dist/*
 }
-function bert_rpc_gpu(){
+function faster_rcnn_hrnetv2p_w18_1x_encrypt() {
-  run_gpu_env
+    dir=${log_dir}rpc_model/faster_rcnn_hrnetv2p_w18_1x/
-  unsetproxy
+    cd ${build_path}/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x
-  cd ${build_path}/python/examples/bert
+    check_dir ${dir}
-  sh get_data.sh >/dev/null 2>&1
+    data_dir=${data}detection/faster_rcnn_hrnetv2p_w18_1x/
-  sed -i 's/9292/8860/g' bert_client.py
+    link_data ${data_dir}
-  sed -i '$aprint(result)' bert_client.py
+    python3.6 encrypt.py
-  cp -r /root/.cache/dist_data/serving/bert/bert_seq128_* ./
+    unsetproxy
-  ls -hlst
+    echo -e "${GREEN_COLOR}faster_rcnn_hrnetv2p_w18_1x_ENCRYPTION_GPU_RPC server started${RES}" | tee -a ${log_dir}server_total.txt
-  python3.6 -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 8860 --gpu_ids 0 &
+    python3.6 -m paddle_serving_server.serve --model encrypt_server/ --port 9494 --use_trt --gpu_ids 0 --use_encryption_model > ${dir}server_log.txt 2>&1 &
-  sleep 15
+    check_result server 3
-  nvidia-smi
+    echo -e "${GREEN_COLOR}faster_rcnn_hrnetv2p_w18_1x_ENCRYPTION_GPU_RPC client started${RES}" | tee -a ${log_dir}client_total.txt
-  head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
+    python3.6 test_encryption.py 000000570688.jpg > ${dir}client_log.txt 2>&1
-  nvidia-smi
+    check_result client "faster_rcnn_hrnetv2p_w18_1x_ENCRYPTION_GPU_RPC server test completed"
-  check_result $FUNCNAME
+    kill_server_process
-  kill_server_process serving
+}
-}
+function pipeline_ocr_cpu_http() {
-function bert_rpc_cpu(){
+    dir=${log_dir}rpc_model/pipeline_ocr_cpu_http/
-  run_cpu_env
+    check_dir ${dir}
-  unsetproxy
+    cd ${build_path}/python/examples/pipeline/ocr
-  cd ${build_path}/python/examples/bert
+    data_dir=${data}ocr/
-  sed -i 's/8860/8861/g' bert_client.py
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 &
+    unsetproxy
-  sleep 3
+    echo -e "${GREEN_COLOR}pipeline_ocr_CPU_HTTP server started${RES}" | tee -a ${log_dir}server_total.txt
-  cp data-c.txt.1 data-c.txt
+    $py_version web_service.py > ${dir}server_log.txt 2>&1 &
-  head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
+    check_result server 5
-  check_result $FUNCNAME
+    echo -e "${GREEN_COLOR}pipeline_ocr_CPU_HTTP client started${RES}" | tee -a ${log_dir}client_total.txt
-  kill_server_process serving
+    timeout 15s $py_version pipeline_http_client.py > ${dir}client_log.txt 2>&1
-}
+    check_result client "pipeline_ocr_CPU_HTTP server test completed"
+    kill_server_process
-function criteo_ctr_with_cube_rpc(){
+}
-  unsetproxy
-  run_cpu_env
+function bert_rpc_gpu() {
-  cd ${build_path}/python/examples/criteo_ctr_with_cube
+    dir=${log_dir}rpc_model/bert_rpc_gpu/
-  ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
+    check_dir ${dir}
-  sed -i "s/9292/8888/g" test_server.py
+    run_gpu_env
-  sed -i "s/9292/8888/g" test_client.py
+    unsetproxy
-  wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz >/dev/null 2>&1
+    cd ${build_path}/python/examples/bert
-  tar xf ctr_cube_unittest.tar.gz
+    data_dir=${data}bert/
-  mv models/ctr_client_conf ./
+    link_data ${data_dir}
-  mv models/ctr_serving_model_kv ./
+    sh get_data.sh >/dev/null 2>&1
-  mv models/data ./cube/
+    sed -i 's/9292/8860/g' bert_client.py
-  wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz >/dev/null 2>&1
+    sed -i '$aprint(result)' bert_client.py
-  tar xf cube_app.tar.gz
+    ls -hlst
-  mv cube_app/cube* ./cube/
+    python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8860 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  sh cube_prepare.sh > haha 2>&1 &
+    check_result server 15
-  sleep 5
+    nvidia-smi
-  python3.6 test_server.py ctr_serving_model_kv &
+    head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
-  sleep 5
+    check_result client "bert_GPU_RPC server test completed"
-  python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
+    nvidia-smi
-  check_result $FUNCNAME
+    kill_server_process
-  kill `ps -ef|grep cube|awk '{print $2}'`
+}
-  kill_server_process test_server
-}
+function bert_rpc_cpu() {
+    dir=${log_dir}rpc_model/bert_rpc_cpu/
-function pipeline_imagenet(){
+    check_dir ${dir}
-  run_gpu_env
+    unsetproxy
-  unsetproxy
+    cd ${build_path}/python/examples/bert
-  cd ${build_path}/python/examples/pipeline/imagenet
+    data_dir=${data}bert/
-  cp -r /root/.cache/dist_data/serving/imagenet/* ./
+    link_data ${data_dir}
-  ls -a
+    sed -i 's/8860/8861/g' bert_client.py
-  python3.6 resnet50_web_service.py &
+    python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 > ${dir}server_log.txt 2>&1 &
-  sleep 5
+    check_result server 3
-  nvidia-smi
+    cp data-c.txt.1 data-c.txt
-  python3.6 pipeline_rpc_client.py
+    head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
-  nvidia-smi
+    check_result client "bert_CPU_RPC server test completed"
-  # check_result $FUNCNAME
+    kill_server_process
-  kill_server_process resnet50_web_service
+}
-}
+function pipeline_imagenet() {
-function ResNet50_rpc(){
+    dir=${log_dir}rpc_model/pipeline_imagenet/
-  run_gpu_env
+    check_dir ${dir}
-  unsetproxy
+    unsetproxy
-  cd ${build_path}/python/examples/imagenet
+    cd ${build_path}/python/examples/pipeline/imagenet
-  cp -r /root/.cache/dist_data/serving/imagenet/* ./
+    data_dir=${data}imagenet/
-  sed -i 's/9696/8863/g' resnet50_rpc_client.py
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 &
+    python3.6 resnet50_web_service.py > ${dir}server_log.txt 2>&1 &
-  sleep 5
+    check_result server 5
-  nvidia-smi
+    nvidia-smi
-  python3.6 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+    python3.6 pipeline_rpc_client.py > ${dir}client_log.txt 2>&1
-  nvidia-smi
+    check_result client "pipeline_imagenet_GPU_RPC server test completed"
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    kill_server_process
 }
-function ResNet101_rpc(){
+function ResNet50_rpc() {
-  run_gpu_env
+    dir=${log_dir}rpc_model/ResNet50_rpc/
-  unsetproxy
+    check_dir ${dir}
-  cd ${build_path}/python/examples/imagenet
+    unsetproxy
-  sed -i "22cclient.connect(['${host}:8864'])" image_rpc_client.py
+    cd ${build_path}/python/examples/imagenet
-  python3.6 -m paddle_serving_server_gpu.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 &
+    data_dir=${data}imagenet/
-  sleep 5
+    link_data ${data_dir}
-  nvidia-smi
+    sed -i 's/9696/8863/g' resnet50_rpc_client.py
-  python3.6 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt
+    python3.6 -m paddle_serving_server.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  nvidia-smi
+    check_result server 5
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    python3.6 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
-  sleep 5
+    check_result client "ResNet50_GPU_RPC server test completed"
-}
+    nvidia-smi
+    kill_server_process
-function cnn_rpc(){
+}
-  unsetproxy
-  run_cpu_env
+function ResNet101_rpc() {
-  cd ${build_path}/python/examples/imdb
+    dir=${log_dir}rpc_model/ResNet101_rpc/
-  cp -r /root/.cache/dist_data/serving/imdb/* ./
+    check_dir ${dir}
-  tar xf imdb_model.tar.gz && tar xf text_classification_data.tar.gz
+    unsetproxy
-  sed -i 's/9292/8865/g' test_client.py
+    cd ${build_path}/python/examples/imagenet
-  python3.6 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 &
+    data_dir=${data}imagenet/
-  sleep 5
+    link_data ${data_dir}
-  head test_data/part-0 | python3.6 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+    sed -i "22cclient.connect(['127.0.0.1:8864'])" image_rpc_client.py
-  check_result $FUNCNAME
+    python3.6 -m paddle_serving_server.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  kill_server_process serving
+    check_result server 5
-}
+    nvidia-smi
+    python3.6 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
-function bow_rpc(){
+    check_result client "ResNet101_GPU_RPC server test completed"
-  unsetproxy
+    nvidia-smi
-  run_cpu_env
+    kill_server_process
-  cd ${build_path}/python/examples/imdb
+}
-  sed -i 's/8865/8866/g' test_client.py
-  python3.6 -m paddle_serving_server.serve --model imdb_bow_model/ --port 8866 &
+function cnn_rpc() {
-  sleep 5
+    dir=${log_dir}rpc_model/cnn_rpc/
-  head test_data/part-0 | python3.6 test_client.py imdb_bow_client_conf/serving_client_conf.prototxt imdb.vocab
+    check_dir ${dir}
-  check_result $FUNCNAME
+    unsetproxy
-  kill_server_process serving
+    cd ${build_path}/python/examples/imdb
-}
+    data_dir=${data}imdb/
+    link_data ${data_dir}
-function lstm_rpc(){
+    sed -i 's/9292/8865/g' test_client.py
-  unsetproxy
+    python3.6 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 > ${dir}server_log.txt 2>&1 &
-  run_cpu_env
+    check_result server 5
-  cd ${build_path}/python/examples/imdb
+    head test_data/part-0 | python3.6 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
-  sed -i 's/8866/8867/g' test_client.py
+    check_result client "cnn_CPU_RPC server test completed"
-  python3.6 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 8867 &
+    kill_server_process
-  sleep 5
+}
-  head test_data/part-0 | python3.6 test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
-  check_result $FUNCNAME
+function bow_rpc() {
-  kill_server_process serving
+    dir=${log_dir}rpc_model/bow_rpc/
-}
+    check_dir ${dir}
+    unsetproxy
-function lac_rpc(){
+    cd ${build_path}/python/examples/imdb
-  unsetproxy
+    data_dir=${data}imdb/
-  run_cpu_env
+    link_data ${data_dir}
-  cd ${build_path}/python/examples/lac
+    sed -i 's/8865/8866/g' test_client.py
-  python3.6 -m paddle_serving_app.package --get_model lac >/dev/null 2>&1
+    python3.6 -m paddle_serving_server.serve --model imdb_bow_model/ --port 8866 > ${dir}server_log.txt 2>&1 &
-  tar xf lac.tar.gz
+    check_result server 5
-  sed -i 's/9292/8868/g' lac_client.py
+    head test_data/part-0 | python3.6 test_client.py imdb_bow_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
-  python3.6 -m paddle_serving_server.serve --model lac_model/ --port 8868 &
+    check_result client "bow_CPU_RPC server test completed"
-  sleep 5
+    kill_server_process
-  echo "我爱北京天安门" | python3.6 lac_client.py lac_client/serving_client_conf.prototxt lac_dict/
+}
-  check_result $FUNCNAME
-  kill_server_process serving
+function lstm_rpc() {
-}
+    dir=${log_dir}rpc_model/lstm_rpc/
+    check_dir ${dir}
-function fit_a_line_rpc(){
+    unsetproxy
-  unsetproxy
+    cd ${build_path}/python/examples/imdb
-  run_cpu_env
+    data_dir=${data}imdb/
-  cd ${build_path}/python/examples/fit_a_line
+    link_data ${data_dir}
-  sh get_data.sh >/dev/null 2>&1
+    sed -i 's/8866/8867/g' test_client.py
-  sed -i 's/9393/8869/g' test_client.py
+    python3.6 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 8867 > ${dir}server_log.txt 2>&1 &
-  python3.6 -m paddle_serving_server.serve --model uci_housing_model --port 8869 &
+    check_result server 5
-  sleep 5
+    head test_data/part-0 | python3.6 test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
-  python3.6 test_client.py uci_housing_client/serving_client_conf.prototxt
+    check_result client "lstm_CPU_RPC server test completed"
-  check_result $FUNCNAME
+    kill_server_process
-  kill_server_process serving
+}
-}
+function lac_rpc() {
-function faster_rcnn_model_rpc(){
+    dir=${log_dir}rpc_model/lac_rpc/
-  unsetproxy
+    check_dir ${dir}
-  run_gpu_env
+    unsetproxy
-  cd ${build_path}/python/examples/faster_rcnn
+    cd ${build_path}/python/examples/lac
-  cp -r /root/.cache/dist_data/serving/faster_rcnn/faster_rcnn_model.tar.gz ./
+    data_dir=${data}lac/
-  tar xf faster_rcnn_model.tar.gz
+    link_data ${data_dir}
-  wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml >/dev/null 2>&1
+    sed -i 's/9292/8868/g' lac_client.py
-  mv faster_rcnn_model/pddet* ./
+    python3.6 -m paddle_serving_server.serve --model lac_model/ --port 8868 > ${dir}server_log.txt 2>&1 &
-  sed -i 's/9494/8870/g' test_client.py
+    check_result server 5
-  python3.6 -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 8870 --gpu_id 0 --thread 2 &
+    echo "我爱北京天安门" | python3.6 lac_client.py lac_client/serving_client_conf.prototxt lac_dict/ > ${dir}client_log.txt 2>&1
-  echo "faster rcnn running ..."
+    check_result client "lac_CPU_RPC server test completed"
-  nvidia-smi
+    kill_server_process
-  sleep 5
+}
-  python3.6 test_client.py pddet_client_conf/serving_client_conf.prototxt infer_cfg.yml 000000570688.jpg
-  nvidia-smi
+function fit_a_line_rpc() {
-  check_result $FUNCNAME
+    dir=${log_dir}rpc_model/fit_a_line_rpc/
-  kill_server_process serving
+    check_dir ${dir}
-}
+    unsetproxy
+    cd ${build_path}/python/examples/fit_a_line
-function cascade_rcnn_rpc(){
+    data_dir=${data}fit_a_line/
-  unsetproxy
+    link_data ${data_dir}
-  run_gpu_env
+    sed -i 's/9393/8869/g' test_client.py
-  cd ${build_path}/python/examples/cascade_rcnn
+    python3.6 -m paddle_serving_server.serve --model uci_housing_model --port 8869 > ${dir}server_log.txt 2>&1 &
-  cp -r /root/.cache/dist_data/serving/cascade_rcnn/cascade_rcnn_r50_fpx_1x_serving.tar.gz ./
+    check_result server 5
-  tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
+    python3.6 test_client.py uci_housing_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
-  sed -i "s/9292/8879/g" test_client.py
+    check_result client "fit_a_line_CPU_RPC server test completed"
-  python3.6 -m paddle_serving_server_gpu.serve --model serving_server --port 8879 --gpu_id 0 --thread 2 &
+    kill_server_process
-  sleep 5
+}
-  nvidia-smi
-  python3.6 test_client.py
+function faster_rcnn_model_rpc() {
-  nvidia-smi
+    dir=${log_dir}rpc_model/faster_rcnn_rpc/
-  check_result $FUNCNAME
+    check_dir ${dir}
-  kill_server_process serving
+    unsetproxy
+    cd ${build_path}/python/examples/detection/faster_rcnn_r50_fpn_1x_coco
+    data_dir=${data}detection/faster_rcnn_r50_fpn_1x_coco/
+    link_data ${data_dir}
+    sed -i 's/9494/8870/g' test_client.py
+    python3.6 -m paddle_serving_server.serve --model serving_server --port 8870 --gpu_ids 0 --thread 2 --use_trt > ${dir}server_log.txt 2>&1 &
+    echo "faster rcnn running ..."
+    nvidia-smi
+    check_result server 10
+    python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
+    nvidia-smi
+    check_result client "faster_rcnn_GPU_RPC server test completed"
+    kill_server_process
+}
+function cascade_rcnn_rpc() {
+    dir=${log_dir}rpc_model/cascade_rcnn_rpc/
+    check_dir ${dir}
+    unsetproxy
+    cd ${build_path}/python/examples/cascade_rcnn
+    data_dir=${data}cascade_rcnn/
+    link_data ${data_dir}
+    sed -i "s/9292/8879/g" test_client.py
+    python3.6 -m paddle_serving_server.serve --model serving_server --port 8879 --gpu_ids 0 --thread 2 > ${dir}server_log.txt 2>&1 &
+    check_result server 5
+    nvidia-smi
+    python3.6 test_client.py > ${dir}client_log.txt 2>&1
+    nvidia-smi
+    check_result client "cascade_rcnn_GPU_RPC server test completed"
+    kill_server_process
 }
 function deeplabv3_rpc() {
-  unsetproxy
+    dir=${log_dir}rpc_model/deeplabv3_rpc/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/deeplabv3
+    unsetproxy
-  cp -r /root/.cache/dist_data/serving/deeplabv3/deeplabv3.tar.gz ./
+    cd ${build_path}/python/examples/deeplabv3
-  tar xf deeplabv3.tar.gz
+    data_dir=${data}deeplabv3/
-  sed -i "s/9494/8880/g" deeplabv3_client.py
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 8880 --thread 2 &
+    sed -i "s/9494/8880/g" deeplabv3_client.py
-  sleep 5
+    python3.6 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 8880 --thread 2 > ${dir}server_log.txt 2>&1 &
-  nvidia-smi
+    check_result server 5
-  python3.6 deeplabv3_client.py
+    nvidia-smi
-  nvidia-smi
+    python3.6 deeplabv3_client.py > ${dir}client_log.txt 2>&1
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    check_result client "deeplabv3_GPU_RPC server test completed"
+    kill_server_process
 }
 function mobilenet_rpc() {
-  unsetproxy
+    dir=${log_dir}rpc_model/mobilenet_rpc/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/mobilenet
+    unsetproxy
-  python3.6 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet >/dev/null 2>&1
+    cd ${build_path}/python/examples/mobilenet
-  tar xf mobilenet_v2_imagenet.tar.gz
+    python3.6 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet >/dev/null 2>&1
-  sed -i "s/9393/8881/g" mobilenet_tutorial.py
+    tar xf mobilenet_v2_imagenet.tar.gz
-  python3.6 -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 &
+    sed -i "s/9393/8881/g" mobilenet_tutorial.py
-  sleep 5
+    python3.6 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 > ${dir}server_log.txt 2>&1 &
-  nvidia-smi
+    check_result server 5
-  python3.6 mobilenet_tutorial.py
+    nvidia-smi
-  nvidia-smi
+    python3.6 mobilenet_tutorial.py > ${dir}client_log.txt 2>&1
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    check_result client "mobilenet_GPU_RPC server test completed"
+    kill_server_process
 }
 function unet_rpc() {
- unsetproxy
+    dir=${log_dir}rpc_model/unet_rpc/
- run_gpu_env
+    check_dir ${dir}
- cd ${build_path}/python/examples/unet_for_image_seg
+    unsetproxy
- python3.6 -m paddle_serving_app.package --get_model unet >/dev/null 2>&1
+    cd ${build_path}/python/examples/unet_for_image_seg
- tar xf unet.tar.gz
+    data_dir=${data}unet_for_image_seg/
- sed -i "s/9494/8882/g" seg_client.py
+    link_data ${data_dir}
- python3.6 -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 8882 &
+    sed -i "s/9494/8882/g" seg_client.py
- sleep 5
+    python3.6 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 8882 > ${dir}server_log.txt 2>&1 &
- nvidia-smi
+    check_result server 5
- python3.6 seg_client.py
+    nvidia-smi
- nvidia-smi
+    python3.6 seg_client.py > ${dir}client_log.txt 2>&1
- check_result $FUNCNAME
+    nvidia-smi
- kill_server_process serving
+    check_result client "unet_GPU_RPC server test completed"
+    kill_server_process
 }
 function resnetv2_rpc() {
-  unsetproxy
+    dir=${log_dir}rpc_model/resnetv2_rpc/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/resnet_v2_50
+    unsetproxy
-  cp /root/.cache/dist_data/serving/resnet_v2_50/resnet_v2_50_imagenet.tar.gz ./
+    cd ${build_path}/python/examples/resnet_v2_50
-  tar xf resnet_v2_50_imagenet.tar.gz
+    data_dir=${data}resnet_v2_50/
-  sed -i 's/9393/8883/g' resnet50_v2_tutorial.py
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 8883 &
+    sed -i 's/9393/8883/g' resnet50_v2_tutorial.py
-  sleep 10
+    python3.6 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 8883 > ${dir}server_log.txt 2>&1 &
-  nvidia-smi
+    check_result server 10
-  python3.6 resnet50_v2_tutorial.py
+    nvidia-smi
-  nvidia-smi
+    python3.6 resnet50_v2_tutorial.py > ${dir}client_log.txt 2>&1
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    check_result client "resnetv2_GPU_RPC server test completed"
+    kill_server_process
 }
 function ocr_rpc() {
-  unsetproxy
+    dir=${log_dir}rpc_model/ocr_rpc/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/ocr
+    unsetproxy
-  cp -r /root/.cache/dist_data/serving/ocr/test_imgs ./
+    cd ${build_path}/python/examples/ocr
-  python3.6 -m paddle_serving_app.package --get_model ocr_rec >/dev/null 2>&1
+    data_dir=${data}ocr/
-  tar xf ocr_rec.tar.gz
+    link_data ${data_dir}
-  sed -i 's/9292/8884/g' test_ocr_rec_client.py
+    python3.6 -m paddle_serving_app.package --get_model ocr_rec >/dev/null 2>&1
-  python3.6 -m paddle_serving_server.serve --model ocr_rec_model --port 8884 &
+    tar xf ocr_rec.tar.gz
-  sleep 5
+    sed -i 's/9292/8884/g' test_ocr_rec_client.py
-  python3.6 test_ocr_rec_client.py
+    python3.6 -m paddle_serving_server.serve --model ocr_rec_model --port 8884 > ${dir}server_log.txt 2>&1 &
- # check_result $FUNCNAME
+    check_result server 5
-  kill_server_process serving
+    python3.6 test_ocr_rec_client.py > ${dir}client_log.txt 2>&1
+    check_result client "ocr_CPU_RPC server test completed"
+    kill_server_process
 }
 function criteo_ctr_rpc_cpu() {
-  unsetproxy
+    dir=${log_dir}rpc_model/criteo_ctr_rpc_cpu/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/criteo_ctr
+    unsetproxy
-  sed -i "s/9292/8885/g" test_client.py
+    cd ${build_path}/python/examples/criteo_ctr
-  ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
+    data_dir=${data}criteo_ctr/
-  wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz >/dev/null 2>&1
+    link_data ${data_dir}
-  tar xf criteo_ctr_demo_model.tar.gz
+    sed -i "s/9292/8885/g" test_client.py
-  mv models/ctr_client_conf .
+    python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8885 > ${dir}server_log.txt 2>&1 &
-  mv models/ctr_serving_model .
+    check_result server 5
-  python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8885 &
+    python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 > ${dir}client_log.txt 2>&1
-  sleep 5
+    check_result client "criteo_ctr_CPU_RPC server test completed"
-  python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
+    kill_server_process
-  check_result $FUNCNAME
-  kill_server_process serving
 }
 function criteo_ctr_rpc_gpu() {
-  unsetproxy
+    dir=${log_dir}rpc_model/criteo_ctr_rpc_gpu/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/criteo_ctr
+    unsetproxy
-  sed -i "s/8885/8886/g" test_client.py
+    cd ${build_path}/python/examples/criteo_ctr
-  wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz >/dev/null 2>&1
+    data_dir=${data}criteo_ctr/
-  python3.6 -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 &
+    link_data ${data_dir}
-  sleep 5
+    sed -i "s/8885/8886/g" test_client.py
-  nvidia-smi
+    python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
+    check_result server 5
-  nvidia-smi
+    nvidia-smi
-  check_result $FUNCNAME
+    python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 > ${dir}client_log.txt 2>&1
-  kill `ps -ef|grep ctr|awk '{print $2}'`
+    nvidia-smi
-  kill_server_process serving
+    check_result client "criteo_ctr_GPU_RPC server test completed"
+    kill_server_process
 }
 function yolov4_rpc_gpu() {
-  unsetproxy
+    dir=${log_dir}rpc_model/yolov4_rpc_gpu/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/yolov4
+    unsetproxy
-  sed -i "s/9393/8887/g" test_client.py
+    cd ${build_path}/python/examples/yolov4
-  cp -r /root/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
+    data_dir=${data}yolov4/
-  tar xf yolov4.tar.gz
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 &
+    sed -i "s/9393/8887/g" test_client.py
-  nvidia-smi
+    python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  sleep 5
+    nvidia-smi
-  python3.6 test_client.py 000000570688.jpg
+    check_result server 5
-  nvidia-smi
+    python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
- # check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    check_result client "yolov4_GPU_RPC server test completed"
+    kill_server_process
 }
 function senta_rpc_cpu() {
-  unsetproxy
+    dir=${log_dir}rpc_model/senta_rpc_cpu/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/senta
+    unsetproxy
-  sed -i "s/9393/8887/g" test_client.py
+    cd ${build_path}/python/examples/senta
-  cp -r /data/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
+    data_dir=${data}senta/
-  tar xf yolov4.tar.gz
+    link_data ${data_dir}
-  python3.6 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 &
+    sed -i "s/9393/8887/g" test_client.py
-  nvidia-smi
+    python3.6 -m paddle_serving_server.serve --model yolov4_model --port 8887 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
-  sleep 5
+    nvidia-smi
-  python3.6 test_client.py 000000570688.jpg
+    check_result server 5
-  nvidia-smi
+    python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
-  check_result $FUNCNAME
+    nvidia-smi
-  kill_server_process serving
+    check_result client "senta_GPU_RPC server test completed"
+    kill_server_process
 }
 function fit_a_line_http() {
-  unsetproxy
+    dir=${log_dir}http_model/fit_a_line_http/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/fit_a_line
+    unsetproxy
-  sed -i "s/9292/8871/g" test_server.py
+    cd ${build_path}/python/examples/fit_a_line
-  python3.6 test_server.py &
+    sed -i "s/9393/8871/g" test_server.py
-  sleep 10
+    python3.6 test_server.py > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://${host}:8871/uci/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:8871/uci/prediction > ${dir}client_log.txt 2>&1
-  kill_server_process test_server
+    check_result client "fit_a_line_CPU_HTTP server test completed"
+    kill_server_process
 }
 function lac_http() {
-  unsetproxy
+    dir=${log_dir}http_model/lac_http/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/lac
+    unsetproxy
-  python3.6 lac_web_service.py lac_model/ lac_workdir 8872 &
+    cd ${build_path}/python/examples/lac
-  sleep 10
+    python3.6 lac_web_service.py lac_model/ lac_workdir 8872 > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://${host}:8872/lac/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:8872/lac/prediction > ${dir}client_log.txt 2>&1
-  kill_server_process lac_web_service
+    check_result client "lac_CPU_HTTP server test completed"
+    kill_server_process
 }
 function cnn_http() {
-  unsetproxy
+    dir=${log_dir}http_model/cnn_http/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/imdb
+    unsetproxy
-  python3.6 text_classify_service.py imdb_cnn_model/ workdir/ 8873 imdb.vocab &
+    cd ${build_path}/python/examples/imdb
-  sleep 10
+    python3.6 text_classify_service.py imdb_cnn_model/ workdir/ 8873 imdb.vocab > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8873/imdb/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8873/imdb/prediction > ${dir}client_log.txt 2>&1
-  kill_server_process text_classify_service
+    check_result client "cnn_CPU_HTTP server test completed"
+    kill_server_process
 }
 function bow_http() {
-  unsetproxy
+    dir=${log_dir}http_model/bow_http/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/imdb
+    unsetproxy
-  python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8874 imdb.vocab &
+    cd ${build_path}/python/examples/imdb
-  sleep 10
+    python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8874 imdb.vocab > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8874/imdb/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8874/imdb/prediction > ${dir}client_log.txt 2>&1
-  kill_server_process text_classify_service
+    check_result client "bow_CPU_HTTP server test completed"
+    kill_server_process
 }
 function lstm_http() {
-  unsetproxy
+    dir=${log_dir}http_model/lstm_http/
-  run_cpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/imdb
+    unsetproxy
-  python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8875 imdb.vocab &
+    cd ${build_path}/python/examples/imdb
-  sleep 10
+    python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8875 imdb.vocab > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8875/imdb/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8875/imdb/prediction > ${dir}client_log.txt 2>&1
-  kill `ps -ef|grep imdb|awk '{print $2}'`
+    check_result client "lstm_CPU_HTTP server test completed"
-  kill_server_process text_classify_service
+    kill_server_process
 }
 function ResNet50_http() {
-  unsetproxy
+    dir=${log_dir}http_model/ResNet50_http/
-  run_gpu_env
+    check_dir ${dir}
-  cd ${build_path}/python/examples/imagenet
+    unsetproxy
-  python3.6 resnet50_web_service.py ResNet50_vd_model gpu 8876 &
+    cd ${build_path}/python/examples/imagenet
-  sleep 10
+    python3.6 resnet50_web_service.py ResNet50_vd_model gpu 8876 > ${dir}server_log.txt 2>&1 &
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://${host}:8876/image/prediction
+    check_result server 10
-  check_result $FUNCNAME
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:8876/image/prediction > ${dir}client_log.txt 2>&1
-  kill_server_process resnet50_web_service
+    check_result client "ResNet50_GPU_HTTP server test completed"
-}
+    kill_server_process
+}
-function bert_http(){
-  unsetproxy
+function bert_http() {
-  run_gpu_env
+    dir=${log_dir}http_model/ResNet50_http/
-  cd ${build_path}/python/examples/bert
+    check_dir ${dir}
-  cp data-c.txt.1 data-c.txt
+    unsetproxy
-  cp vocab.txt.1 vocab.txt
+    cd ${build_path}/python/examples/bert
-  export CUDA_VISIBLE_DEVICES=0
+    cp data-c.txt.1 data-c.txt
-  python3.6 bert_web_service.py bert_seq128_model/ 8878 &
+    cp vocab.txt.1 vocab.txt
-  sleep 5
+    export CUDA_VISIBLE_DEVICES=0
-  curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction
+    python3.6 bert_web_service.py bert_seq128_model/ 8878 > ${dir}server_log.txt 2>&1 &
-  check_result $FUNCNAME
+    check_result server 5
-  kill_server_process bert_web_service
+    curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction > ${dir}client_log.txt 2>&1
-}
+    check_result client "bert_GPU_HTTP server test completed"
+    kill_server_process
-grpc_impl(){
+}
-  unsetproxy
-  run_gpu_env
+function grpc_fit_a_line() {
-  cd ${build_path}/python/examples/grpc_impl_example/fit_a_line
+    dir=${log_dir}rpc_model/grpc_fit_a_line/
-  sh get_data.sh >/dev/null 2>&1
+    check_dir ${dir}
-  python3.6 test_server.py uci_housing_model/ &
+    unsetproxy
-  sleep 5
+    cd ${build_path}/python/examples/grpc_impl_example/fit_a_line
-  echo "sync predict"
+    data_dir=${data}fit_a_line/
-  python3.6 test_sync_client.py
+    link_data ${data_dir}
-  echo "async predict"
+    python3.6 test_server.py uci_housing_model/ > ${dir}server_log.txt 2>&1 &
-  python3.6 test_asyn_client.py
+    check_result server 5
-  echo "batch predict"
+    echo "sync predict" > ${dir}client_log.txt 2>&1
-  python3.6 test_batch_client.py
+    python3.6 test_sync_client.py >> ${dir}client_log.txt 2>&1
-  echo "timeout predict"
+    check_result client "grpc_impl_example_fit_a_line_sync_CPU_gRPC server sync test completed"
-  python3.6 test_timeout_client.py
+    echo "async predict" >> ${dir}client_log.txt 2>&1
-#  check_result $FUNCNAME
+    python3.6 test_asyn_client.py >> ${dir}client_log.txt 2>&1
-  kill_server_process test_server
+    check_result client "grpc_impl_example_fit_a_line_asyn_CPU_gRPC server asyn test completed"
-}
+    echo "batch predict" >> ${dir}client_log.txt 2>&1
+    python3.6 test_batch_client.py >> ${dir}client_log.txt 2>&1
-function build_all_whl(){
+    check_result client "grpc_impl_example_fit_a_line_batch_CPU_gRPC server batch test completed"
-  for whl in ${build_whl_list[@]}
+    echo "timeout predict" >> ${dir}client_log.txt 2>&1
-  do
+    python3.6 test_timeout_client.py >> ${dir}client_log.txt 2>&1
-    echo "===========${whl} begin build==========="
+    check_result client "grpc_impl_example_fit_a_line_timeout_CPU_gRPC server timeout test completed"
-    $whl
+    kill_server_process
-    sleep 3
+}
-    echo "===========${whl} build over ==========="
-  done
+function grpc_yolov4() {
-}
+    dir=${log_dir}rpc_model/grpc_yolov4/
+    cd ${build_path}/python/examples/grpc_impl_example/yolov4
-function run_rpc_models(){
+    check_dir ${dir}
-  for model in ${rpc_model_list[@]}
+    data_dir=${data}yolov4/
-  do
+    link_data ${data_dir}
-    echo "===========${model} run begin==========="
+    echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC server started${RES}"
-    $model
+    python3.6 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang > ${dir}server_log.txt 2>&1 &
-    sleep 3
+    check_result server 5
-    echo "===========${model} run  end ==========="
+    echo -e "${GREEN_COLOR}grpc_impl_example_yolov4_GPU_gRPC client started${RES}"
-  done
+    python3.6 test_client.py 000000570688.jpg > ${dir}client_log.txt 2>&1
-}
+    check_result client "grpc_yolov4_GPU_GRPC server test completed"
+    kill_server_process
-function run_http_models(){
+}
-  for model in ${http_model_list[@]}
-  do
+function build_all_whl() {
-    echo "===========${model} run begin==========="
+    for whl in ${build_whl_list[@]}
-    $model
+    do
-    sleep 3
+        echo "===========${whl} begin build==========="
-    echo "===========${model} run  end ==========="
+        $whl
-  done
+        sleep 3
-}
+        echo "===========${whl} build over ==========="
+    done
-function end_hook(){
+}
-  cd ${build_path}
-  kill_server_process
+function run_rpc_models() {
-  kill `ps -ef|grep python|awk '{print $2}'`
+    for model in ${rpc_model_list[@]}
-  sleep 5
+    do
-  echo "===========files==========="
+        echo "===========${model} run begin==========="
-  ls -hlst
+        $model
-  echo "=========== end ==========="
+        sleep 3
+        echo "===========${model} run  end ==========="
+    done
+}
+function run_http_models() {
+    for model in ${http_model_list[@]}
+    do
+        echo "===========${model} run begin==========="
+        $model
+        sleep 3
+        echo "===========${model} run  end ==========="
+    done
+}
+function end_hook() {
+    cd ${build_path}
+    kill_server_process
+    kill `ps -ef|grep python|awk '{print $2}'`
+    sleep 5
+    echo "===========files==========="
+    ls -hlst
+    echo "=========== end ==========="
 }
 function main() {
-  before_hook
+    before_hook
-  build_all_whl
+    build_all_whl
-  check
+    check
-  run_env
+    run_env
-  run_rpc_models
+    unsetproxy
-#   run_http_models
+    run_gpu_env
-  end_hook
+    check_dir ${log_dir}rpc_model/
+    check_dir ${log_dir}http_model/
+    check_dir ${log_dir}error/
+    run_rpc_models
+    run_http_models
+    end_hook
+    if [ -f ${log_dir}error_models.txt ]; then
+        cat ${log_dir}error_models.txt
+        echo "error occurred!"
+        # exit 1
+    fi
 }
 main$@