Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into pyserving

eb606d82 · barrierye · af15850c · 78a7beab · eb606d82 · eb606d82
71 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,17 @@ include(generic)
 include(flags)
 endif()
+if (APP)
+include(external/zlib)
+include(external/boost)
+include(external/protobuf)
+include(external/gflags)
+include(external/glog)
+include(external/pybind11)
+include(external/python)
+include(generic)
+endif()
 if (SERVER)
 include(external/cudnn)
 include(paddlepaddle)

--- a/README.md
+++ b/README.md
@@ -82,7 +82,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `port` | int | `9292` | Exposed port of current service to users|
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | bool | `False` | Enable memory optimization |
+| `mem_optim` | bool | `False` | Enable memory / graphic memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 </center>

--- a/README_CN.md
+++ b/README_CN.md
@@ -87,6 +87,8 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
 | `mem_optim` | bool | `False` | Enable memory optimization |
+| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
+| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求，请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
 </center>

--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "1.7.1")
+SET(PADDLE_VERSION "1.7.2")
 if (WITH_GPU)
    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")

--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -23,6 +23,11 @@ add_subdirectory(pdcodegen)
 add_subdirectory(sdk-cpp)
 endif()
+if (APP)
+add_subdirectory(configure)
+endif()
 if(CLIENT)
 add_subdirectory(general-client)
 endif()

--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
+if (SERVER OR CLIENT)
 LIST(APPEND protofiles
        ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
        ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
@@ -28,6 +29,7 @@ FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
 install(FILES ${inc}
        DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
+endif()
 py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
 add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@@ -51,6 +53,14 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 endif()
+if (APP)
+add_custom_command(TARGET general_model_config_py_proto POST_BUILD
+                COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
+                COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
+                WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
 if (SERVER)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)

--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -43,6 +43,7 @@ message EngineDesc {
  optional bool enable_memory_optimization = 13;
  optional bool static_optimization = 14;
  optional bool force_update_static_cache = 15;
+  optional bool enable_ir_optimization = 16;
 };
 // model_toolkit conf

--- a/core/general-client/CMakeLists.txt
+++ b/core/general-client/CMakeLists.txt
 if(CLIENT)
 add_subdirectory(pybind11)
 pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
-target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
 endif()
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -345,7 +345,7 @@ int PredictorClient::numpy_predict(
    PredictorRes &predict_res_batch,
    const int &pid) {
  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
+  VLOG(2) << "batch size: " << batch_size;
  predict_res_batch.clear();
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();
@@ -462,7 +462,7 @@ int PredictorClient::numpy_predict(
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
                for (ssize_t l = 0; k < int_array.shape(3); l++) {
-                  tensor->add_float_data(int_array(i, j, k, l));
+                  tensor->add_int64_data(int_array(i, j, k, l));
                }
              }
            }
@@ -474,7 +474,7 @@ int PredictorClient::numpy_predict(
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
              for (ssize_t k = 0; k < int_array.shape(2); k++) {
-                tensor->add_float_data(int_array(i, j, k));
+                tensor->add_int64_data(int_array(i, j, k));
              }
            }
          }
@@ -484,7 +484,7 @@ int PredictorClient::numpy_predict(
          auto int_array = int_feed[vec_idx].unchecked<2>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
            for (ssize_t j = 0; j < int_array.shape(1); j++) {
-              tensor->add_float_data(int_array(i, j));
+              tensor->add_int64_data(int_array(i, j));
            }
          }
          break;
@@ -492,7 +492,7 @@ int PredictorClient::numpy_predict(
        case 1: {
          auto int_array = int_feed[vec_idx].unchecked<1>();
          for (ssize_t i = 0; i < int_array.shape(0); i++) {
-            tensor->add_float_data(int_array(i));
+            tensor->add_int64_data(int_array(i));
          }
          break;
        }

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -35,6 +35,7 @@ class InferEngineCreationParams {
  InferEngineCreationParams() {
    _path = "";
    _enable_memory_optimization = false;
+    _enable_ir_optimization = false;
    _static_optimization = false;
    _force_update_static_cache = false;
  }
@@ -45,10 +46,16 @@ class InferEngineCreationParams {
    _enable_memory_optimization = enable_memory_optimization;
  }
+  void set_enable_ir_optimization(bool enable_ir_optimization) {
+    _enable_ir_optimization = enable_ir_optimization;
+  }
  bool enable_memory_optimization() const {
    return _enable_memory_optimization;
  }
+  bool enable_ir_optimization() const { return _enable_ir_optimization; }
  void set_static_optimization(bool static_optimization = false) {
    _static_optimization = static_optimization;
  }
@@ -68,6 +75,7 @@ class InferEngineCreationParams {
              << "model_path = " << _path << ", "
              << "enable_memory_optimization = " << _enable_memory_optimization
              << ", "
+              << "enable_ir_optimization = " << _enable_ir_optimization << ", "
              << "static_optimization = " << _static_optimization << ", "
              << "force_update_static_cache = " << _force_update_static_cache;
  }
@@ -75,6 +83,7 @@ class InferEngineCreationParams {
 private:
  std::string _path;
  bool _enable_memory_optimization;
+  bool _enable_ir_optimization;
  bool _static_optimization;
  bool _force_update_static_cache;
 };
@@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine {
      force_update_static_cache = conf.force_update_static_cache();
    }
+    if (conf.has_enable_ir_optimization()) {
+      _infer_engine_params.set_enable_ir_optimization(
+          conf.enable_ir_optimization());
+    }
    _infer_engine_params.set_path(_model_data_path);
    if (enable_memory_optimization) {
      _infer_engine_params.set_enable_memory_optimization(true);

--- a/core/sdk-cpp/include/endpoint_config.h
+++ b/core/sdk-cpp/include/endpoint_config.h
@@ -22,23 +22,23 @@ namespace baidu {
 namespace paddle_serving {
 namespace sdk_cpp {
-#define PARSE_CONF_ITEM(conf, item, name, fail)             \
+#define PARSE_CONF_ITEM(conf, item, name, fail)          \
-  do {                                                      \
+  do {                                                   \
-    if (conf.has_##name()) {                                \
+    if (conf.has_##name()) {                             \
-      item.set(conf.name());                                \
+      item.set(conf.name());                             \
-    } else {                                                \
+    } else {                                             \
-      LOG(ERROR) << "Not found key in configue: " << #name; \
+      VLOG(2) << "Not found key in configue: " << #name; \
-    }                                                       \
+    }                                                    \
  } while (0)
-#define ASSIGN_CONF_ITEM(dest, src, fail)                          \
+#define ASSIGN_CONF_ITEM(dest, src, fail)                       \
-  do {                                                             \
+  do {                                                          \
-    if (!src.init) {                                               \
+    if (!src.init) {                                            \
-      LOG(ERROR) << "Cannot assign an unintialized item: " << #src \
+      VLOG(2) << "Cannot assign an unintialized item: " << #src \
-                 << " to dest: " << #dest;                         \
+              << " to dest: " << #dest;                         \
-      return fail;                                                 \
+      return fail;                                              \
-    }                                                              \
+    }                                                           \
-    dest = src.value;                                              \
+    dest = src.value;                                           \
  } while (0)
 template <typename T>

--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2 and later
 - Git：2.17.1 and later
 - CMake：3.2.2 and later
- Python：2.7.2 and later
+- Python：2.7.2 and later / 3.6 and later
 It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you: 
 - CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
 - GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
-This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
+This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake:
+- Set `DPYTHON_INCLUDE_DIR` to `$PYTHONROOT/include/python3.6m/`
+- Set  `DPYTHON_LIBRARIES` to `$PYTHONROOT/lib64/libpython3.6.so`
+- Set `DPYTHON_EXECUTABLE` to `$PYTHONROOT/bin/python3`
 ## Get Code
@@ -54,6 +58,8 @@ make -j10
 execute `make install` to put targets under directory `./output`
+**Attention：** After the compilation is successful, you need to set the path of `SERVING_BIN`. See [Note](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Note) for details.
 ## Compile Client
 ``` shell

--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -9,14 +9,18 @@
 - Golang: 1.9.2及以上
 - Git：2.17.1及以上
 - CMake：3.2.2及以上
- Python：2.7.2及以上
+- Python：2.7.2及以上 / 3.6及以上
 推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境：
 - CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`，dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
 - GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`，dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
-本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可。
+本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译，只需要调整cmake的Python相关选项即可：
+- 将`DPYTHON_INCLUDE_DIR`设置为`$PYTHONROOT/include/python3.6m/`
+- 将`DPYTHON_LIBRARIES`设置为`$PYTHONROOT/lib64/libpython3.6.so`
+- 将`DPYTHON_EXECUTABLE`设置为`$PYTHONROOT/bin/python3`
 ## 获取代码
@@ -54,6 +58,8 @@ make -j10
 执行`make install`可以把目标产出放在`./output`目录下。
+**注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
 ## 编译Client部分
 ``` shell

--- a/doc/PERFORMANCE_OPTIM.md
+++ b/doc/PERFORMANCE_OPTIM.md
+# Performance optimization
+Due to different model structures, different prediction services consume different computing resources when performing predictions. For online prediction services, models that require less computing resources will have a higher proportion of communication time cost, which is called communication-intensive service. Models that require more computing resources have a higher time cost for inference calculations, which is called computationa-intensive services.
+For a prediction service, the easiest way to determine what type it is is to look at the time ratio. Paddle Serving provides [Timeline tool] (../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service.
+For communication-intensive prediction services, requests can be aggregated, and within a limit that can tolerate delay, multiple prediction requests can be combined into a batch for prediction.
+For computation-intensive prediction services, you can use GPU prediction services instead of CPU prediction services, or increase the number of graphics cards for GPU prediction services.
+Under the same conditions, the communication time of the HTTP prediction service provided by Paddle Serving is longer than that of the RPC prediction service, so for communication-intensive services, please give priority to using RPC communication.
+Parameters for performance optimization:
+| Parameters | Type | Default | Description                                                  |
+| ---------- | ---- | ------- | ------------------------------------------------------------ |
+| mem_optim  | bool | False   | Enable memory / graphic memory optimization                                   |
+| ir_optim   | bool | Fasle   | Enable analysis and optimization of calculation graph,including OP fusion, etc |
--- a/doc/PERFORMANCE_OPTIM_CN.md
+++ b/doc/PERFORMANCE_OPTIM_CN.md
 # 性能优化
-由于模型结构的不同，在执行预测时不同的预测对计算资源的消耗也不相同，对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
+由于模型结构的不同，在执行预测时不同的预测服务对计算资源的消耗也不相同。对于在线的预测服务来说，对计算资源要求较少的模型，通信的时间成本占比就会较高，称为通信密集型服务，对计算资源要求较多的模型，推理计算的时间成本较高，称为计算密集型服务。对于这两种服务类型，可以根据实际需求采取不同的方式进行优化
 对于一个预测服务来说，想要判断属于哪种类型，最简单的方法就是看时间占比，Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md)，可以直观的展现预测服务中各阶段的耗时。
@@ -10,4 +10,9 @@
 在相同条件下，Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的，因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
-对于模型较大，预测服务内存或显存占用较多的情况，可以通过将--mem_optim选项设置为True来开启内存/显存优化。
+性能优化相关参数：
+| 参数      | 类型 | 默认值 | 含义                      |
+| --------- | ---- | ------ | -------------------------------- |
+| mem_optim | bool | False  | 开启内存/显存优化                |
+| ir_optim  | bool | Fasle  | 开启计算图分析优化，包括OP融合等 |
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -53,12 +53,6 @@ pip install paddle-serving-server -i https://pypi.tuna.tsinghua.edu.cn/simple
 ### Test example
-Before running the GPU version of the Server side code, you need to set the `CUDA_VISIBLE_DEVICES` environment variable to specify which GPUs the prediction service uses. The following example specifies two GPUs with indexes 0 and 1:
-```bash
-export CUDA_VISIBLE_DEVICES=0,1
-```
 Get the trained Boston house price prediction model by the following command:
 ```bash
@@ -71,13 +65,13 @@ tar -xzf uci_housing.tar.gz
  Running on the Server side (inside the container):
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
  ```
  Running on the Client side (inside or outside the container):
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - Test RPC service
@@ -85,7 +79,7 @@ tar -xzf uci_housing.tar.gz
  Running on the Server side (inside the container):
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
  ```
  Running following Python code on the Client side (inside or outside the container, The `paddle-serving-client` package needs to be installed):
@@ -176,7 +170,7 @@ tar -xzf uci_housing.tar.gz
  Running on the Client side (inside or outside the container):
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - Test RPC service

--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -65,13 +65,13 @@ tar -xzf uci_housing.tar.gz
  在Server端（容器内）运行：
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci >std.log 2>err.log &
  ```
  在Client端（容器内或容器外）运行：
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - 测试RPC服务
@@ -79,7 +79,7 @@ tar -xzf uci_housing.tar.gz
  在Server端（容器内）运行：
  ```bash
-  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 &>std.log 2>err.log &
+  python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 >std.log 2>err.log &
  ```
  在Client端（容器内或容器外，需要安装`paddle-serving-client`包）运行下面Python代码：
@@ -168,7 +168,7 @@ tar -xzf uci_housing.tar.gz
  在Client端（容器内或容器外）运行：
  ```bash
-  curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
+  curl -H "Content-Type:application/json" -X POST -d '{"feed":{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}, "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
  ```
 - 测试RPC服务

--- a/doc/SAVE.md
+++ b/doc/SAVE.md
-## How to save a servable model of Paddle Serving?
+# How to save a servable model of Paddle Serving?
 ([简体中文](./SAVE_CN.md)|English)
- Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
+## Save from training or prediction script 
+Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
 ``` python
 import paddle_serving_client.io as serving_io
 serving_io.save_model("imdb_model", "imdb_client_conf",
@@ -29,3 +30,15 @@ for line in sys.stdin:
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
 ```
+## Export from saved model files
+If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
+```
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+```
+dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
+serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
-## 怎样保存用于Paddle Serving的模型？
+# 怎样保存用于Paddle Serving的模型？
 (简体中文|[English](./SAVE.md))
- 目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
+## 从训练或预测脚本中保存
+目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
 ``` python
 import paddle_serving_client.io as serving_io
@@ -29,3 +30,15 @@ for line in sys.stdin:
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
 ```
+## 从已保存的模型文件中导出
+如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
+```
+import paddle_serving_client.io as serving_io
+serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+```
+dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 __model__ 作为默认的文件名。默认值为None。
+params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
--- a/doc/TRAIN_TO_SERVICE.md
+++ b/doc/TRAIN_TO_SERVICE.md
@@ -350,12 +350,12 @@ In the above command, the first parameter is the saved server-side model and con
 After starting the HTTP prediction service, you can make prediction with a single command:
 ```
-curl -H "Content-Type: application/json" -X POST -d '{"words": "i am very sad | 0", "fetch": ["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 When the inference process is normal, the prediction probability is returned, as shown below.
 ```
-{"prediction": [0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 **Note**: The effect of each model training may be slightly different, and the inferred probability value using the trained model may not be consistent with the example.
--- a/doc/TRAIN_TO_SERVICE_CN.md
+++ b/doc/TRAIN_TO_SERVICE_CN.md
@@ -353,12 +353,12 @@ python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
 启动完HTTP预测服务，即可通过一行命令进行预测：
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 预测流程正常时，会返回预测概率，示例如下。
 ```
-{"prediction":[0.5592559576034546,0.44074398279190063]}
+{"result":{"prediction":[[0.4389057457447052,0.561094343662262]]}}
 ```
 **注意**：每次模型训练的效果可能略有不同，使用训练出的模型预测概率数值可能与示例不一致。
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
      analysis_config.EnableMemoryOptim();
    }
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
    AutoLock lock(GlobalPaddleCreateMutex::instance());
    _core =
        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);

--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
      analysis_config.EnableMemoryOptim();
    }
+    if (params.enable_ir_optimization()) {
+      analysis_config.SwitchIrOptim(true);
+    } else {
+      analysis_config.SwitchIrOptim(false);
+    }
    AutoLock lock(GlobalPaddleCreateMutex::instance());
    _core =
        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -19,6 +19,8 @@ endif()
 if (CLIENT)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
    ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
+    ${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
 endif()
 if (APP)
@@ -43,7 +45,8 @@ if (APP)
 add_custom_command(
        OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
        COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
-        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel)
+        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+        DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
 endif()
@@ -52,6 +55,7 @@ add_custom_command(
 	OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
 	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
 	COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
+    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
 	COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 	DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)

--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -2,16 +2,6 @@
 ([简体中文](./README_CN.md)|English)
-### Compile Source Code
-in the root directory of this git project
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
 ### Get Sample Dataset
 go to directory `python/examples/criteo_ctr_with_cube`
@@ -31,7 +21,9 @@ the model will be in ./ctr_server_model_kv and ./ctr_client_config.
 ### Start Sparse Parameter Indexing Service
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```

--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
 ## 带稀疏参数索引服务的CTR预测服务
 (简体中文|[English](./README.md))
-### 编译源代码
-在本项目的根目录下，执行
-```
-mkdir build_server
-cd build_server
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
-make -j10
-make install -j10
-```
 ### 获取样例数据
 进入目录 `python/examples/criteo_ctr_with_cube`
 ```
@@ -29,7 +19,9 @@ mv models/data ./cube/
 ### 启动稀疏参数索引服务
 ```
-cp ../../../build_server/output/bin/cube* ./cube/
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```

--- a/python/examples/deeplabv3/N0060.jpg
+++ b/python/examples/deeplabv3/N0060.jpg
--- a/python/examples/faster_rcnn_model/test_client.py
+++ b/python/examples/faster_rcnn_model/test_client.py
@@ -13,21 +13,22 @@
 # limitations under the License.
 from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
 import sys
-import os
+import cv2
-import time
-from paddle_serving_app.reader.pddet import Detection
-import numpy as np
-py_version = sys.version_info[0]
-feed_var_names = ['image', 'im_shape', 'im_info']
-fetch_var_names = ['multiclass_nms']
-pddet = Detection(config_path=sys.argv[2], output_dir="./output")
-feed_dict = pddet.preprocess(feed_var_names, sys.argv[3])
 client = Client()
-client.load_client_config(sys.argv[1])
+client.load_client_config("seg_client/serving_client_conf.prototxt")
-client.connect(['127.0.0.1:9494'])
+client.connect(["127.0.0.1:9494"])
-fetch_map = client.predict(feed=feed_dict, fetch=fetch_var_names)
-outs = fetch_map.values()
+preprocess = Sequential(
-pddet.postprocess(fetch_map, fetch_var_names)
+    [File2Image(), Resize(
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
+postprocess = SegPostprocess(2)
+filename = "N0060.jpg"
+im = preprocess(filename)
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
+fetch_map["filename"] = filename
+postprocess(fetch_map)
--- a/python/examples/faster_rcnn_model/README_CN.md
+++ b/python/examples/faster_rcnn_model/README_CN.md
@@ -12,7 +12,7 @@ wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml
 ### 启动服务
 ```
 tar xf faster_rcnn_model.tar.gz
-mv faster_rcnn_model/pddet* .
+mv faster_rcnn_model/pddet* ./
 GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
 ```

--- a/python/examples/faster_rcnn_model/label_list.txt
+++ b/python/examples/faster_rcnn_model/label_list.txt
+background
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/python/examples/faster_rcnn_model/new_test_client.py
+++ b/python/examples/faster_rcnn_model/new_test_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import *
+preprocess = Sequential([
+    File2Image(), BGR2RGB(), Div(255.0),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+    Resize(640, 640), Transpose((2, 0, 1))
+])
+postprocess = RCNNPostprocess("label_list.txt", "output")
+client = Client()
+client.load_client_config(
+    "faster_rcnn_client_conf/serving_client_conf.prototxt")
+client.connect(['127.0.0.1:9393'])
+im = preprocess(sys.argv[2])
+fetch_map = client.predict(
+    feed={
+        "image": im,
+        "im_info": np.array(list(im.shape[1:]) + [1.0]),
+        "im_shape": np.array(list(im.shape[1:]) + [1.0])
+    },
+    fetch=["multiclass_nms"])
+fetch_map["image"] = sys.argv[1]
+postprocess(fetch_map)
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -8,6 +8,13 @@ The example uses the ResNet50_vd model to perform the imagenet 1000 classificati
 ```
 sh get_model.sh
 ```
+### Install preprocess module
+```
+pip install paddle_serving_app
+```
 ### HTTP Infer
 launch server side

--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -8,6 +8,13 @@
 ```
 sh get_model.sh
 ```
+### 安装数据预处理模块
+```
+pip install paddle_serving_app
+```
 ### 执行HTTP预测服务
 启动server端

--- a/python/examples/imagenet/image_rpc_client.py
+++ b/python/examples/imagenet/image_rpc_client.py
@@ -13,22 +13,24 @@
 # limitations under the License.
 import sys
-from image_reader import ImageReader
 from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
 import time
 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9393"])
-reader = ImageReader()
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+print(seq)
 start = time.time()
+image_file = "daisy.jpg"
 for i in range(1000):
-    with open("./data/n01440764_10026.JPEG", "rb") as f:
+    img = seq(image_file)
-        img = f.read()
-    img = reader.process_image(img)
    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
 end = time.time()
 print(end - start)
-#print(fetch_map["score"])
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -31,4 +31,4 @@ for line in sys.stdin:
    feed = {"words": word_ids}
    fetch = ["acc", "cost", "prediction"]
    fetch_map = client.predict(feed=feed, fetch=fetch)
-    print("{} {}".format(fetch_map["prediction"][0][1], label[0]))
+    print("{} {}".format(fetch_map["prediction"][0], label[0]))
--- a/python/examples/lac/lac_reader.py
+++ b/python/examples/lac/lac_reader.py
@@ -14,8 +14,10 @@
 from paddle_serving_client import Client
 import sys
-reload(sys)
+py_version = sys.version_info[0]
-sys.setdefaultencoding('utf-8')
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io

--- a/python/examples/mobilenet/daisy.jpg
+++ b/python/examples/mobilenet/daisy.jpg
--- a/python/examples/mobilenet/mobilenet_tutorial.py
+++ b/python/examples/mobilenet/mobilenet_tutorial.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "mobilenet_v2_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
--- a/python/examples/resnet_v2_50/daisy.jpg
+++ b/python/examples/resnet_v2_50/daisy.jpg
--- a/python/examples/resnet_v2_50/resnet50_debug.py
+++ b/python/examples/resnet_v2_50/resnet50_debug.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+from paddle_serving_app import Debugger
+import sys
+debugger = Debugger()
+debugger.load_model_config(sys.argv[1], gpu=True)
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = debugger.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
--- a/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
+++ b/python/examples/resnet_v2_50/resnet50_v2_tutorial.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from apddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "resnet_v2_50_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["feature_map"])
+print(fetch_map["feature_map"].reshape(-1))
--- a/python/examples/senta/README.md
+++ b/python/examples/senta/README.md
@@ -4,6 +4,12 @@
 ```
 sh get_data.sh
 ```
+## Install preprocess module
+```
+pip install paddle_serving_app
+```
 ## Start http service
 ```
 python senta_web_service.py senta_bilstm_model/ workdir 9292

--- a/python/examples/senta/README_CN.md
+++ b/python/examples/senta/README_CN.md
@@ -4,6 +4,11 @@
 ```
 sh get_data.sh
 ```
+## 安装数据预处理模块
+```
+pip install paddle_serving_app
+```
 ## 启动HTTP服务
 ```
 python senta_web_service.py senta_bilstm_model/ workdir 9292

--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -39,6 +39,8 @@ class SentaService(WebService):
        self.show = show
    def start_lac_service(self):
+        if not os.path.exists('./lac_serving'):
+            os.mkdir("./lac_serving")
        os.chdir('./lac_serving')
        self.lac_port = self.port + 100
        r = os.popen(

--- a/python/examples/unet_for_image_seg/N0060.jpg
+++ b/python/examples/unet_for_image_seg/N0060.jpg
--- a/python/examples/unet_for_image_seg/seg_client.py
+++ b/python/examples/unet_for_image_seg/seg_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
+import sys
+import cv2
+client = Client()
+client.load_client_config("unet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9494"])
+preprocess = Sequential(
+    [File2Image(), Resize(
+        (512, 512), interpolation=cv2.INTER_LINEAR)])
+postprocess = SegPostprocess(2)
+im = preprocess("N0060.jpg")
+fetch_map = client.predict(feed={"image": im}, fetch=["output"])
+fetch_map["filename"] = filename
+postprocess(fetch_map)
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader
+from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize
 from .reader.lac_reader import LACReader
 from .reader.senta_reader import SentaReader
 from .models import ServingModels
+from .local_predict import Debugger
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+import os
+import google.protobuf.text_format
+import numpy as np
+import argparse
+import paddle.fluid as fluid
+from .proto import general_model_config_pb2 as m_config
+from paddle.fluid.core import PaddleTensor
+from paddle.fluid.core import AnalysisConfig
+from paddle.fluid.core import create_paddle_predictor
+import logging
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+class Debugger(object):
+    def __init__(self):
+        self.feed_names_ = []
+        self.fetch_names_ = []
+        self.feed_types_ = {}
+        self.fetch_types_ = {}
+        self.feed_shapes_ = {}
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+        self.fetch_names_to_type_ = {}
+    def load_model_config(self, model_path, gpu=False, profile=True, cpu_num=1):
+        client_config = "{}/serving_server_conf.prototxt".format(model_path)
+        model_conf = m_config.GeneralModelConfig()
+        f = open(client_config, 'r')
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
+        config = AnalysisConfig(model_path)
+        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.feed_names_to_idx_ = {}
+        self.fetch_names_to_idx_ = {}
+        for i, var in enumerate(model_conf.feed_var):
+            self.feed_names_to_idx_[var.alias_name] = i
+            self.feed_types_[var.alias_name] = var.feed_type
+            self.feed_shapes_[var.alias_name] = var.shape
+        for i, var in enumerate(model_conf.fetch_var):
+            self.fetch_names_to_idx_[var.alias_name] = i
+            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+        if not gpu:
+            config.disable_gpu()
+        else:
+            config.enable_use_gpu(100, 0)
+        if profile:
+            config.enable_profile()
+        config.set_cpu_math_library_num_threads(cpu_num)
+        config.switch_ir_optim(False)
+        self.predictor = create_paddle_predictor(config)
+    def predict(self, feed=None, fetch=None):
+        if feed is None or fetch is None:
+            raise ValueError("You should specify feed and fetch for prediction")
+        fetch_list = []
+        if isinstance(fetch, str):
+            fetch_list = [fetch]
+        elif isinstance(fetch, list):
+            fetch_list = fetch
+        else:
+            raise ValueError("Fetch only accepts string and list of string")
+        feed_batch = []
+        if isinstance(feed, dict):
+            feed_batch.append(feed)
+        elif isinstance(feed, list):
+            feed_batch = feed
+        else:
+            raise ValueError("Feed only accepts dict and list of dict")
+        int_slot_batch = []
+        float_slot_batch = []
+        int_feed_names = []
+        float_feed_names = []
+        int_shape = []
+        float_shape = []
+        fetch_names = []
+        counter = 0
+        batch_size = len(feed_batch)
+        for key in fetch_list:
+            if key in self.fetch_names_:
+                fetch_names.append(key)
+        if len(fetch_names) == 0:
+            raise ValueError(
+                "Fetch names should not be empty or out of saved fetch list.")
+            return {}
+        inputs = []
+        for name in self.feed_names_:
+            inputs.append(PaddleTensor(feed[name][np.newaxis, :]))
+        outputs = self.predictor.run(inputs)
+        fetch_map = {}
+        for name in fetch:
+            fetch_map[name] = outputs[self.fetch_names_to_idx_[
+                name]].as_ndarray()
+        return fetch_map
--- a/python/paddle_serving_app/models/model_list.py
+++ b/python/paddle_serving_app/models/model_list.py
@@ -20,78 +20,49 @@ from collections import OrderedDict
 class ServingModels(object):
    def __init__(self):
        self.model_dict = OrderedDict()
-        #senta
+        self.model_dict[
-        for key in [
+            "SentimentAnalysis"] = ["senta_bilstm", "senta_bow", "senta_cnn"]
-                "senta_bilstm", "senta_bow", "senta_cnn", "senta_gru",
+        self.model_dict["SemanticRepresentation"] = ["ernie_base"]
-                "senta_lstm"
+        self.model_dict["ChineseWordSegmentation"] = ["lac"]
-        ]:
+        self.model_dict["ObjectDetection"] = ["faster_rcnn", "yolov3"]
-            self.model_dict[
+        self.model_dict["ImageSegmentation"] = ["unet", "deeplabv3"]
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/" + key + ".tar.gz"
+        self.model_dict["ImageClassification"] = [
-        #image classification
+            "resnet_v2_50_imagenet", "mobilenet_v2_imagenet"
-        for key in [
+        ]
-                "alexnet_imagenet",
-                "darknet53-imagenet",
+        image_class_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/"
-                "densenet121_imagenet",
+        image_seg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/"
-                "densenet161_imagenet",
+        object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/"
-                "densenet169_imagenet",
+        senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/"
-                "densenet201_imagenet",
+        semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticRepresentation/"
-                "densenet264_imagenet"
+        wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/ChineseWordSegmentation/"
-                "dpn107_imagenet",
-                "dpn131_imagenet",
+        self.url_dict = {}
-                "dpn68_imagenet",
-                "dpn92_imagenet",
+        def pack_url(model_dict, key, url):
-                "dpn98_imagenet",
+            for i, value in enumerate(model_dict[key]):
-                "efficientnetb0_imagenet",
+                self.url_dict[model_dict[key][i]] = url + model_dict[key][
-                "efficientnetb1_imagenet",
+                    i] + ".tar.gz"
-                "efficientnetb2_imagenet",
-                "efficientnetb3_imagenet",
+        pack_url(self.model_dict, "SentimentAnalysis", senta_url)
-                "efficientnetb4_imagenet",
+        pack_url(self.model_dict, "SemanticRepresentation", semantic_url)
-                "efficientnetb5_imagenet",
+        pack_url(self.model_dict, "ChineseWordSegmentation", wordseg_url)
-                "efficientnetb6_imagenet",
+        pack_url(self.model_dict, "ObjectDetection", object_detection_url)
-                "googlenet_imagenet",
+        pack_url(self.model_dict, "ImageSegmentation", image_seg_url)
-                "inception_v4_imagenet",
+        pack_url(self.model_dict, "ImageClassification", image_class_url)
-                "inception_v2_imagenet",
-                "nasnet_imagenet",
-                "pnasnet_imagenet",
-                "resnet_v2_101_imagenet",
-                "resnet_v2_151_imagenet",
-                "resnet_v2_18_imagenet",
-                "resnet_v2_34_imagenet",
-                "resnet_v2_50_imagenet",
-                "resnext101_32x16d_wsl",
-                "resnext101_32x32d_wsl",
-                "resnext101_32x48d_wsl",
-                "resnext101_32x8d_wsl",
-                "resnext101_32x4d_imagenet",
-                "resnext101_64x4d_imagenet",
-                "resnext101_vd_32x4d_imagenet",
-                "resnext101_vd_64x4d_imagenet",
-                "resnext152_64x4d_imagenet",
-                "resnext152_vd_64x4d_imagenet",
-                "resnext50_64x4d_imagenet",
-                "resnext50_vd_32x4d_imagenet",
-                "resnext50_vd_64x4d_imagenet",
-                "se_resnext101_32x4d_imagenet",
-                "se_resnext50_32x4d_imagenet",
-                "shufflenet_v2_imagenet",
-                "vgg11_imagenet",
-                "vgg13_imagenet",
-                "vgg16_imagenet",
-                "vgg19_imagenet",
-                "xception65_imagenet",
-                "xception71_imagenet",
-        ]:
-            self.model_dict[
-                key] = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/" + key + ".tar.gz"
    def get_model_list(self):
-        return (self.model_dict.keys())
+        return self.model_dict
    def download(self, model_name):
-        if model_name in self.model_dict:
+        if model_name in self.url_dict:
-            url = self.model_dict[model_name]
+            url = self.url_dict[model_name]
            r = os.system('wget ' + url + ' --no-check-certificate')
+    def get_tutorial(self, model_name):
+        if model_name in self.tutorial_url:
+            return "Tutorial of {} to be added".format(model_name)
 if __name__ == "__main__":
    models = ServingModels()

--- a/python/paddle_serving_app/package.py
+++ b/python/paddle_serving_app/package.py
@@ -20,6 +20,7 @@ Usage:
 """
 import argparse
+import sys
 from .models import ServingModels
@@ -29,6 +30,8 @@ def parse_args():  # pylint: disable=doc-string-missing
        "--get_model", type=str, default="", help="Download a specific model")
    parser.add_argument(
        '--list_model', nargs='*', default=None, help="List Models")
+    parser.add_argument(
+        '--tutorial', type=str, default="", help="Get running command")
    return parser.parse_args()
@@ -36,18 +39,33 @@ if __name__ == "__main__":
    args = parse_args()
    if args.list_model != None:
        model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
+        model_dict = model_handle.get_model_list()
-        for key in model_names:
+        # Task level model list
-            print(key)
+        # Text Classification, Semantic Representation
+        # Image Classification, Object Detection, Image Segmentation
+        for key in model_dict:
+            print("-----------------------------------------------")
+            print("{}: {}".format(key, " | ".join(model_dict[key])))
    elif args.get_model != "":
        model_handle = ServingModels()
-        model_names = model_handle.get_model_list()
+        model_dict = model_handle.url_dict
-        if args.get_model not in model_names:
+        if args.get_model not in model_dict:
            print(
                "Your model name does not exist in current model list, stay tuned"
            )
            sys.exit(0)
        model_handle.download(args.get_model)
+    elif args.tutorial != "":
+        model_handle = ServingModels()
+        model_dict = model_handle.url_dict
+        if args.get_model not in model_dict:
+            print(
+                "Your model name does not exist in current model list, stay tuned"
+            )
+            sys.exit(0)
+        tutorial_str = model_handle.get_tutorial()
+        print(tutorial_str)
    else:
        print("Wrong argument")
        print("""

--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,3 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess
--- a/python/paddle_serving_app/reader/daisy.jpg
+++ b/python/paddle_serving_app/reader/daisy.jpg
--- a/python/paddle_serving_app/reader/functional.py
+++ b/python/paddle_serving_app/reader/functional.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+def transpose(img, transpose_target):
+    img = img.transpose(transpose_target)
+    return img
+def normalize(img, mean, std, channel_first):
+    # need to optimize here
+    if channel_first:
+        img_mean = np.array(mean).reshape((3, 1, 1))
+        img_std = np.array(std).reshape((3, 1, 1))
+    else:
+        img_mean = np.array(mean).reshape((1, 1, 3))
+        img_std = np.array(std).reshape((1, 1, 3))
+    img -= img_mean
+    img /= img_std
+    return img
+def crop(img, target_size, center):
+    height, width = img.shape[:2]
+    size = target_size
+    if center == True:
+        w_start = (width - size) // 2
+        h_start = (height - size) // 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img[h_start:h_end, w_start:w_end, :]
+    return img
+def resize(img, target_size, max_size=2147483647, interpolation=None):
+    if isinstance(target_size, tuple):
+        resized_width = min(target_size[0], max_size)
+        resized_height = min(target_size[1], max_size)
+    else:
+        im_max_size = max(img.shape[0], img.shape[1])
+        percent = float(target_size) / min(img.shape[0], img.shape[1])
+        if np.round(percent * im_max_size) > max_size:
+            percent = float(max_size) / float(im_max_size)
+        resized_width = int(round(img.shape[1] * percent))
+        resized_height = int(round(img.shape[0] * percent))
+    if interpolation:
+        resized = cv2.resize(
+            img, (resized_width, resized_height), interpolation=interpolation)
+    else:
+        resized = cv2.resize(img, (resized_width, resized_height))
+    return resized
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -11,9 +11,472 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import cv2
+import os
+import urllib
 import numpy as np
+import base64
+from . import functional as F
+from PIL import Image, ImageDraw
+import json
+_cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
+def generate_colormap(num_classes):
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    return color_map
+class SegPostprocess(object):
+    def __init__(self, class_num):
+        self.class_num = class_num
+    def __call__(self, image_with_result):
+        if "filename" not in image_with_result:
+            raise ("filename should be specified in postprocess")
+        img_name = image_with_result["filename"]
+        ori_img = cv2.imread(img_name, -1)
+        ori_shape = ori_img.shape
+        mask = None
+        for key in image_with_result:
+            if ".lod" in key or "filename" in key:
+                continue
+            mask = image_with_result[key]
+        if mask is None:
+            raise ("segment mask should be specified in postprocess")
+        mask = mask.astype("uint8")
+        mask_png = mask.reshape((512, 512, 1))
+        #score_png = mask_png[:, :, np.newaxis]
+        score_png = mask_png
+        score_png = np.concatenate([score_png] * 3, axis=2)
+        color_map = generate_colormap(self.class_num)
+        for i in range(score_png.shape[0]):
+            for j in range(score_png.shape[1]):
+                score_png[i, j] = color_map[score_png[i, j, 0]]
+        ext_pos = img_name.rfind(".")
+        img_name_fix = img_name[:ext_pos] + "_" + img_name[ext_pos + 1:]
+        mask_save_name = img_name_fix + "_mask.png"
+        cv2.imwrite(mask_save_name, mask_png, [cv2.CV_8UC1])
+        vis_result_name = img_name_fix + "_result.png"
+        result_png = score_png
+        result_png = cv2.resize(
+            result_png,
+            ori_shape[:2],
+            fx=0,
+            fy=0,
+            interpolation=cv2.INTER_CUBIC)
+        cv2.imwrite(vis_result_name, result_png, [cv2.CV_8UC1])
+class RCNNPostprocess(object):
+    def __init__(self, label_file, output_dir):
+        self.output_dir = output_dir
+        self.label_file = label_file
+        self.label_list = []
+        with open(label_file) as fin:
+            for line in fin:
+                self.label_list.append(line.strip())
+        self.clsid2catid = {i: i for i in range(len(self.label_list))}
+        self.catid2name = {i: name for i, name in enumerate(self.label_list)}
+    def _offset_to_lengths(self, lod):
+        offset = lod[0]
+        lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)]
+        return [lengths]
+    def _bbox2out(self, results, clsid2catid, is_bbox_normalized=False):
+        xywh_res = []
+        for t in results:
+            bboxes = t['bbox'][0]
+            lengths = t['bbox'][1][0]
+            if bboxes.shape == (1, 1) or bboxes is None:
+                continue
+            k = 0
+            for i in range(len(lengths)):
+                num = lengths[i]
+                for j in range(num):
+                    dt = bboxes[k]
+                    clsid, score, xmin, ymin, xmax, ymax = dt.tolist()
+                    catid = (clsid2catid[int(clsid)])
+                    if is_bbox_normalized:
+                        xmin, ymin, xmax, ymax = \
+                            self.clip_bbox([xmin, ymin, xmax, ymax])
+                        w = xmax - xmin
+                        h = ymax - ymin
+                        im_shape = t['im_shape'][0][i].tolist()
+                        im_height, im_width = int(im_shape[0]), int(im_shape[1])
+                        xmin *= im_width
+                        ymin *= im_height
+                        w *= im_width
+                        h *= im_height
+                    else:
+                        w = xmax - xmin + 1
+                        h = ymax - ymin + 1
+                    bbox = [xmin, ymin, w, h]
+                    coco_res = {
+                        'category_id': catid,
+                        'bbox': bbox,
+                        'score': score
+                    }
+                    xywh_res.append(coco_res)
+                    k += 1
+        return xywh_res
+    def _get_bbox_result(self, fetch_map, fetch_name, clsid2catid):
+        result = {}
+        is_bbox_normalized = False
+        output = fetch_map[fetch_name]
+        lod = [fetch_map[fetch_name + '.lod']]
+        lengths = self._offset_to_lengths(lod)
+        np_data = np.array(output)
+        result['bbox'] = (np_data, lengths)
+        result['im_id'] = np.array([[0]])
+        bbox_results = self._bbox2out([result], clsid2catid, is_bbox_normalized)
+        return bbox_results
+    def color_map(self, num_classes):
+        color_map = num_classes * [0, 0, 0]
+        for i in range(0, num_classes):
+            j = 0
+            lab = i
+            while lab:
+                color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+                color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+                color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+                j += 1
+                lab >>= 3
+        color_map = np.array(color_map).reshape(-1, 3)
+        return color_map
+    def draw_bbox(self, image, catid2name, bboxes, threshold, color_list):
+        """
+        draw bbox on image
+        """
+        draw = ImageDraw.Draw(image)
+        for dt in np.array(bboxes):
+            catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
+            if score < threshold:
+                continue
+            xmin, ymin, w, h = bbox
+            xmax = xmin + w
+            ymax = ymin + h
+            color = tuple(color_list[catid])
+            # draw bbox
+            draw.line(
+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+                 (xmin, ymin)],
+                width=2,
+                fill=color)
+            # draw label
+            text = "{} {:.2f}".format(catid2name[catid], score)
+            tw, th = draw.textsize(text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+        return image
+    def visualize(self, infer_img, bbox_results, catid2name, num_classes):
+        image = Image.open(infer_img).convert('RGB')
+        color_list = self.color_map(num_classes)
+        image = self.draw_bbox(image, self.catid2name, bbox_results, 0.5,
+                               color_list)
+        image_path = os.path.split(infer_img)[-1]
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+        out_path = os.path.join(self.output_dir, image_path)
+        image.save(out_path, quality=95)
+    def __call__(self, image_with_bbox):
+        fetch_name = ""
+        for key in image_with_bbox:
+            if key == "image":
+                continue
+            if ".lod" in key:
+                continue
+            fetch_name = key
+        bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
+                                            self.clsid2catid)
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        self.visualize(image_with_bbox["image"], bbox_result, self.catid2name,
+                       len(self.label_list))
+        if os.path.isdir(self.output_dir) is False:
+            os.mkdir(self.output_dir)
+        bbox_file = os.path.join(self.output_dir, 'bbox.json')
+        with open(bbox_file, 'w') as f:
+            json.dump(bbox_result, f, indent=4)
+    def __repr__(self):
+        return self.__class__.__name__ + "label_file: {1}, output_dir: {2}".format(
+            self.label_file, self.output_dir)
+class Sequential(object):
+    """
+    Args:
+        sequence (sequence of ``Transform`` objects): list of transforms to chain.
+    This API references some of the design pattern of torchvision
+    Users can simply use this API in training as well
+    Example:
+        >>> image_reader.Sequnece([
+        >>>     transforms.CenterCrop(10),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+    def __repr__(self):
+        format_string_ = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string_ += '\n'
+            format_string_ += '    {0}'.format(t)
+        format_string_ += '\n)'
+        return format_string_
+class RGB2BGR(object):
+    def __init__(self):
+        pass
+    def __call__(self, img):
+        return img[:, :, ::-1]
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class BGR2RGB(object):
+    def __init__(self):
+        pass
+    def __call__(self, img):
+        return img[:, :, ::-1]
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class File2Image(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_path):
+        fin = open(img_path)
+        sample = fin.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        '''
+        img = cv2.imread(img_path, -1)
+        channels = img.shape[2]
+        ori_h = img.shape[0]
+        ori_w = img.shape[1]
+        '''
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class URL2Image(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_url):
+        resp = urllib.urlopen(img_url)
+        sample = resp.read()
+        data = np.fromstring(sample, np.uint8)
+        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class Base64ToImage(object):
+    def __init__(self):
+        pass
+    def __call__(self, img_base64):
+        img = base64.b64decode(img_base64)
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class Div(object):
+    """ divide by some float number """
+    def __init__(self, value):
+        self.value = value
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (int8 numpy array)
+        Returns:
+            img (numpy array): (float32 numpy array)
+        """
+        img = img.astype('float32') / self.value
+        return img
+    def __repr__(self):
+        return self.__class__.__name__ + "({})".format(self.value)
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+    .. note::
+        This transform acts out of place, i.e., it does not mutate the input tensor.
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std, channel_first=False):
+        self.mean = mean
+        self.std = std
+        self.channel_first = channel_first
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(img, self.mean, self.std, self.channel_first)
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean,
+                                                                      self.std)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+       Very shame to just copy from 
+       https://github.com/pytorch/vision/blob/master/torchvision/transforms/transforms.py#L301
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd)
+                                     .__name__) + " object is not callable"
+        self.lambd = lambd
+    def __call__(self, img):
+        return self.lambd(img)
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+class CenterCrop(object):
+    """Crops the given Image at the center.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    def __init__(self, size):
+        self.size = size
+    def __call__(self, img):
+        """
+        Args:
+            img (numpy array): Image to be cropped.
+        Returns:
+            numpy array Image: Cropped image.
+        """
+        return F.crop(img, self.size, True)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+class Resize(object):
+    """Resize the input numpy array Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``None``
+    """
+    def __init__(self, size, max_size=2147483647, interpolation=None):
+        self.size = size
+        self.max_size = max_size
+        self.interpolation = interpolation
+    def __call__(self, img):
+        return F.resize(img, self.size, self.max_size, self.interpolation)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, max_size={1}, interpolation={2})'.format(
+            self.size, self.max_size,
+            _cv2_interpolation_to_str[self.interpolation])
+class Transpose(object):
+    def __init__(self, transpose_target):
+        self.transpose_target = transpose_target
+    def __call__(self, img):
+        return F.transpose(img, self.transpose_target)
+        return img
+    def __repr__(self):
+        format_string = self.__class__.__name__ + \
+                        "({})".format(self.transpose_target)
+        return format_string
 class ImageReader():

--- a/python/paddle_serving_app/reader/lac_reader.py
+++ b/python/paddle_serving_app/reader/lac_reader.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_client import Client
 import sys
-reload(sys)
+py_version = sys.version_info[0]
-sys.setdefaultencoding('utf-8')
+if py_version == 2:
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
 import os
 import io

--- a/python/paddle_serving_app/reader/test_image_reader.py
+++ b/python/paddle_serving_app/reader/test_image_reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from image_reader import File2Image
+from image_reader import URL2Image
+from image_reader import Sequential
+from image_reader import Normalize
+from image_reader import CenterCrop
+from image_reader import Resize
+seq = Sequential([
+    File2Image(), CenterCrop(30),
+    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Resize((5, 5))
+])
+url = "daisy.jpg"
+for x in range(100):
+    img = seq(url)
+    print(img.shape)
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.0.1"
+serving_app_version = "0.0.3"
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -112,7 +112,6 @@ class Client(object):
        self.feed_shapes_ = {}
        self.feed_types_ = {}
        self.feed_names_to_idx_ = {}
-        self.rpath()
        self.pid = os.getpid()
        self.predictor_sdk_ = None
        self.producers = []
@@ -121,12 +120,6 @@ class Client(object):
        self.all_numpy_input = True
        self.has_numpy_input = False
-    def rpath(self):
-        lib_path = os.path.dirname(paddle_serving_client.__file__)
-        client_path = os.path.join(lib_path, 'serving_client.so')
-        lib_path = os.path.join(lib_path, 'lib')
-        os.system('patchelf --set-rpath {} {}'.format(lib_path, client_path))
    def load_client_config(self, path):
        from .serving_client import PredictorClient
        from .serving_client import PredictorRes
@@ -267,10 +260,16 @@ class Client(object):
                    if i == 0:
                        int_feed_names.append(key)
                        if isinstance(feed_i[key], np.ndarray):
+                            if key in self.lod_tensor_set:
+                                raise ValueError(
+                                    "LodTensor var can not be ndarray type.")
                            int_shape.append(list(feed_i[key].shape))
                        else:
                            int_shape.append(self.feed_shapes_[key])
                    if isinstance(feed_i[key], np.ndarray):
+                        if key in self.lod_tensor_set:
+                            raise ValueError(
+                                "LodTensor var can not be ndarray type.")
                        #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                        int_slot.append(feed_i[key])
                        self.has_numpy_input = True
@@ -281,10 +280,16 @@ class Client(object):
                    if i == 0:
                        float_feed_names.append(key)
                        if isinstance(feed_i[key], np.ndarray):
+                            if key in self.lod_tensor_set:
+                                raise ValueError(
+                                    "LodTensor var can not be ndarray type.")
                            float_shape.append(list(feed_i[key].shape))
                        else:
                            float_shape.append(self.feed_shapes_[key])
                    if isinstance(feed_i[key], np.ndarray):
+                        if key in self.lod_tensor_set:
+                            raise ValueError(
+                                "LodTensor var can not be ndarray type.")
                        #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                        float_slot.append(feed_i[key])
                        self.has_numpy_input = True

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -103,17 +103,21 @@ def save_model(server_model_folder,
        fout.write(config.SerializeToString())
-def inference_model_to_serving(infer_model, serving_client, serving_server):
+def inference_model_to_serving(dirname,
+                               model_filename=None,
+                               params_filename=None,
+                               serving_server="serving_server",
+                               serving_client="serving_client"):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_program, feed_target_names, fetch_targets = \
-            fluid.io.load_inference_model(dirname=infer_model, executor=exe)
+            fluid.io.load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)
    feed_dict = {
        x: inference_program.global_block().var(x)
        for x in feed_target_names
    }
    fetch_dict = {x.name: x for x in fetch_targets}
-    save_model(serving_client, serving_server, feed_dict, fetch_dict,
+    save_model(serving_server, serving_client, feed_dict, fetch_dict,
               inference_program)
    feed_names = feed_dict.keys()
    fetch_names = fetch_dict.keys()

--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.2.2"
-serving_server_version = "0.2.0"
+serving_server_version = "0.2.2"
-module_proto_version = "0.2.0"
+module_proto_version = "0.2.2"
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -127,6 +127,7 @@ class Server(object):
        self.model_toolkit_conf = None
        self.resource_conf = None
        self.memory_optimization = False
+        self.ir_optimization = False
        self.model_conf = None
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
@@ -175,6 +176,9 @@ class Server(object):
    def set_memory_optimize(self, flag=False):
        self.memory_optimization = flag
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
    def check_local_bin(self):
        if "SERVING_BIN" in os.environ:
            self.use_local_bin = True
@@ -195,6 +199,7 @@ class Server(object):
            engine.enable_batch_align = 0
            engine.model_data_path = model_config_path
            engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False
@@ -244,7 +249,7 @@ class Server(object):
        workflow_oi_config_path = None
        if isinstance(model_config_paths, str):
            # If there is only one model path, use the default infer_op.
-            # Because there are several infer_op type, we need to find 
+            # Because there are several infer_op type, we need to find
            # it from workflow_conf.
            default_engine_names = [
                'general_infer_0', 'general_dist_kv_infer_0',
@@ -284,8 +289,8 @@ class Server(object):
        # check config here
        # print config here
-    def use_mkl(self):
+    def use_mkl(self, flag):
-        self.mkl_flag = True
+        self.mkl_flag = flag
    def get_device_version(self):
        avx_flag = False
@@ -300,6 +305,10 @@ class Server(object):
            else:
                device_version = "serving-cpu-avx-openblas-"
        else:
+            if mkl_flag:
+                print(
+                    "Your CPU does not support AVX, server will running with noavx-openblas mode."
+                )
            device_version = "serving-cpu-noavx-openblas-"
        return device_version

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -41,6 +41,9 @@ def parse_args():  # pylint: disable=doc-string-missing
        "--device", type=str, default="cpu", help="Type of device")
    parser.add_argument(
        "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
+    parser.add_argument("--use_mkl", type=bool, default=False, help="Use MKL")
    parser.add_argument(
        "--max_body_size",
        type=int,
@@ -57,7 +60,9 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    workdir = args.workdir
    device = args.device
    mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
    max_body_size = args.max_body_size
+    use_mkl = args.use_mkl
    if model == "":
        print("You must specify your serving model")
@@ -78,6 +83,8 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
+    server.use_mkl(use_mkl)
    server.set_max_body_size(max_body_size)
    server.set_port(port)

--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.2.2"
-serving_server_version = "0.2.0"
+serving_server_version = "0.2.2"
-module_proto_version = "0.2.0"
+module_proto_version = "0.2.2"
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -47,6 +47,8 @@ def serve_args():
        "--name", type=str, default="None", help="Default service name")
    parser.add_argument(
        "--mem_optim", type=bool, default=False, help="Memory optimize")
+    parser.add_argument(
+        "--ir_optim", type=bool, default=False, help="Graph optimize")
    parser.add_argument(
        "--max_body_size",
        type=int,
@@ -156,6 +158,7 @@ class Server(object):
        self.model_toolkit_conf = None
        self.resource_conf = None
        self.memory_optimization = False
+        self.ir_optimization = False
        self.model_conf = None
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
@@ -204,6 +207,9 @@ class Server(object):
    def set_memory_optimize(self, flag=False):
        self.memory_optimization = flag
+    def set_ir_optimize(self, flag=False):
+        self.ir_optimization = flag
    def check_local_bin(self):
        if "SERVING_BIN" in os.environ:
            self.use_local_bin = True
@@ -240,6 +246,7 @@ class Server(object):
            engine.enable_batch_align = 0
            engine.model_data_path = model_config_path
            engine.enable_memory_optimization = self.memory_optimization
+            engine.enable_ir_optimization = self.ir_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False

--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
    thread_num = args.thread
    model = args.model
    mem_optim = args.mem_optim
+    ir_optim = args.ir_optim
    max_body_size = args.max_body_size
    workdir = "{}_{}".format(args.workdir, gpuid)
@@ -57,6 +58,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.set_memory_optimize(mem_optim)
+    server.set_ir_optimize(ir_optim)
    server.set_max_body_size(max_body_size)
    server.load_model_config(model)

--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.2.0"
+serving_client_version = "0.2.2"
-serving_server_version = "0.2.0"
+serving_server_version = "0.2.2"
-module_proto_version = "0.2.0"
+module_proto_version = "0.2.2"
--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -42,10 +42,11 @@ if '${PACK}' == 'ON':
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'sentencepiece'
+    'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow'
 ]
 packages=['paddle_serving_app',
+	  'paddle_serving_app.proto',
          'paddle_serving_app.reader',
 	  'paddle_serving_app.utils',
 	  'paddle_serving_app.models',
@@ -54,6 +55,8 @@ packages=['paddle_serving_app',
 package_data={}
 package_dir={'paddle_serving_app':
             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app',
+	     'paddle_serving_app.proto':
+	     '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto',
             'paddle_serving_app.reader':
             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader',
 	     'paddle_serving_app.utils':

--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -26,7 +26,7 @@ from setuptools import setup
 from paddle_serving_client.version import serving_client_version
 from pkg_resources import DistributionNotFound, get_distribution
-py_version = sys.version_info[0]
+py_version = sys.version_info
 def python_version():
    return [int(v) for v in platform.python_version().split(".")]
@@ -39,7 +39,12 @@ def find_package(pkgname):
        return False
 def copy_lib():
-    lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] if py_version == 2 else ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    if py_version[0] == 2:
+        lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] 
+    elif py_version[1] == 6:
+        lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
+    elif py_version[1] == 7:
+        lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
    os.popen('mkdir -p paddle_serving_client/lib')
    for lib in lib_list:
        r = os.popen('whereis {}'.format(lib))

--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
    wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
    tar -zxf Python-2.7.5.tgz && \
    cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
    make all && make install && \
    make clean && \
    echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \

--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -21,7 +21,7 @@ RUN yum -y install wget && \
    wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
    tar -zxf Python-2.7.5.tgz && \
    cd Python-2.7.5 && \
-    ./configure --prefix=/usr/local/python2.7 --enable-shared && \
+    ./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
    make all && make install && \
    make clean && \
    echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \

--- a/tools/python_tag.py
+++ b/tools/python_tag.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+import re
+with open("setup.cfg", "w") as f:
+    line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=linux_x86_64".format(
+        get_abbr_impl(), get_impl_ver())
+    f.write(line)