diff --git a/core/predictor/CMakeLists.txt b/core/predictor/CMakeLists.txt
index 1b9dc7b29845a2b8c7f958c1d8e836cb57e91d41..6b5013c3edadb4592df40db539fa75fb9364d02f 100644
--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -6,7 +6,7 @@ include(framework/CMakeLists.txt)
 include(tools/CMakeLists.txt)
 include(src/CMakeLists.txt)
 
-
+add_definitions(-D__STDC_FORMAT_MACROS)
 add_library(pdserving ${pdserving_srcs})
 set_source_files_properties(
         ${pdserving_srcs}
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index 466cef73a5f217cd2322fa5548c518a9004800c2..abb66084ac6f6c57c13c940eb10a87e2aba2daa2 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -63,6 +63,9 @@ If Python3 is used, replace `pip` with `pip3`.
 
 ## GOPATH Setting
 
+
+## Compile Arguments
+
 The default GOPATH is `$HOME/go`, which you can set to other values.
 ```shell
 export GOPATH=$HOME/go
@@ -78,13 +81,17 @@ go get -u github.com/golang/protobuf/protoc-gen-go
 go get -u google.golang.org/grpc
 ```
 
+
 ## Compile Server
 
 ### Integrated CPU version paddle inference library
 
 ``` shell
 mkdir server-build-cpu && cd server-build-cpu
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+      -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+      -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+      -DSERVER=ON ..
 make -j10
 ```
 
@@ -94,7 +101,11 @@ you can execute `make install` to put targets under directory `./output`, you ne
 
 ``` shell
 mkdir server-build-gpu && cd server-build-gpu
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+      -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+      -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+      -DSERVER=ON \
+      -DWITH_GPU=ON ..
 make -j10
 ```
 
@@ -108,7 +119,10 @@ execute `make install` to put targets under directory `./output`
 
 ``` shell
 mkdir client-build && cd client-build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+      -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+      -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+      -DCLIENT=ON ..
 make -j10
 ```
 
diff --git a/doc/FAQ.md b/doc/FAQ.md
index eb4f05a28594effcf59aac880cf4d81846a3a925..119c5a9dbc7237b5dadbddd79fbb4d2340940273 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -1,8 +1,8 @@
 # FAQ
 
-- Q：如何调整RPC服务的等待时间，避免超时？ 
+- Q: 如何调整RPC服务的等待时间，避免超时？ 
 
-  A：使用set_rpc_timeout_ms设置更长的等待时间，单位为毫秒，默认时间为20秒。
+  A: 使用set_rpc_timeout_ms设置更长的等待时间，单位为毫秒，默认时间为20秒。
   
   示例：
   ```
@@ -15,4 +15,13 @@
    ```
 
 - Q: 如何使用自己编译的Paddle Serving进行预测？
-  A：通过pip命令安装自己编译出的whl包，并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
+  A: 通过pip命令安装自己编译出的whl包，并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
+
+- Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
+  A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
+
+- Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
+  A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH，安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving，可以在log/serving.INFO日志文件中查看对应的cudnn版本。
+
+- Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
+  A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0，post10版本使用的cuda 10.0。
diff --git a/doc/NEW_WEB_SERVICE.md b/doc/NEW_WEB_SERVICE.md
index 39bca98a3bdfbc1b2cadb5d2c3d60395b4592b34..86e53b843eb18d28057f69a39934682d797e4de5 100644
--- a/doc/NEW_WEB_SERVICE.md
+++ b/doc/NEW_WEB_SERVICE.md
@@ -1,56 +1,152 @@
 # How to develop a new Web service?
 
+
 ([简体中文](NEW_WEB_SERVICE_CN.md)|English)
 
-This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../python/examples/imagenet/resnet50_web_service.py).
+This document will take Uci service as an example to introduce how to develop a new Web Service. You can check out the complete code [here](../python/examples/pipeline/simple_web_service/web_service.py).
 
-## WebService base class
+## Op base class
+
+In some services, a single model may not meet business needs, requiring multiple models to be concatenated or parallel to complete the entire service. We call a single model operation Op and provide a simple set of interfaces to implement the complex logic of Op concatenation or parallelism.
 
-Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `preprocess` and `postprocess` method. The default implementation is as follows:
+Data between Ops is passed as a dictionary, Op can be started as threads or process, and Op can be configured for the number of concurrencies, etc.
+
+Typically, you need to inherit the Op base class and override its `init_op`,  `preprocess` and `postprocess` methods, which are implemented by default as follows:
 
 ```python
-class WebService(object):
-  
-    def preprocess(self, feed={}, fetch=[]):
-        return feed, fetch
-    def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        return fetch_map
+class Op(object):
+  def init_op(self):
+    pass
+  def preprocess(self, input_dicts):
+    # multiple previous Op
+    if len(input_dicts) != 1:
+      _LOGGER.critical(
+        "Failed to run preprocess: this Op has multiple previous "
+        "inputs. Please override this func.")
+      os._exit(-1)
+    (_, input_dict), = input_dicts.items()
+    return input_dict
+  def postprocess(self, input_dicts, fetch_dict):
+    return fetch_dict
 ```
 
+### init_op
+
+This method is used to load user-defined resources such as dictionaries. A separator is loaded in the [UciOp](../python/examples/pipeline/simple_web_service/web_service.py).
+
+**Note**: If Op is launched in threaded mode, different threads of the same Op execute `init_op` only once and share `init_op` loaded resources when Op is multi-concurrent.
+
 ### preprocess
 
-The preprocess method has two input parameters, `feed` and `fetch`. For an HTTP request `request`:
+This method is used to preprocess the data before model prediction. It has an `input_dicts` parameter, `input_dicts` is a dictionary, key is the `name` of the previous Op, and value is the data transferred from the corresponding previous op (the data is also in dictionary format).
 
-- The value of `feed` is the feed part `request.json["feed"]` in the request data 
-- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
+The `preprocess` method needs to process the data into a ndarray dictionary (key is the feed variable name, and value is the corresponding ndarray value). Op will take the return value as the input of the model prediction and pass the output to the `postprocess` method.
 
-The return values are the feed and fetch values used in the prediction.
+**Note**: if Op does not have a model configuration file, the return value of `preprocess` will be directly passed to `postprocess`.
 
 ### postprocess
 
-The postprocess method has three input parameters, `feed`, `fetch` and `fetch_map`:
+This method is used for data post-processing after model prediction. It has two parameters, `input_dicts` and `fetch_dict`.
+
+Where the `input_dicts` parameter is consistent with the parameter in `preprocess` method, and `fetch_dict` is the output of the model prediction (key is the name of the fetch variable, and value is the corresponding ndarray value). Op will take the return value of `postprocess` as the input of subsequent Op `preprocess`.
 
-- The value of `feed` is the feed part `request.json["feed"]` in the request data 
-- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
-- The value of `fetch_map` is the model output value.
+**Note**: if Op does not have a model configuration file, `fetch_dict` will be the return value of `preprocess`.
 
-The return value will be processed as `{"reslut": fetch_map}` as the return of the HTTP request.
 
-## Develop ImageService class
+
+Here is the op of the UCI example:
+
+```python
+class UciOp(Op):
+    def init_op(self):
+        self.separator = ","
+
+    def preprocess(self, input_dicts):
+        (_, input_dict), = input_dicts.items()
+        x_value = input_dict["x"]
+        if isinstance(x_value, (str, unicode)):
+            input_dict["x"] = np.array(
+                [float(x.strip()) for x in x_value.split(self.separator)])
+        return input_dict
+
+    def postprocess(self, input_dicts, fetch_dict):
+        fetch_dict["price"] = str(fetch_dict["price"][0][0])
+        return fetch_dict
+```
+
+
+
+## WebService base class
+
+Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `get_pipeline_response` method to define the topological relationship between Ops. The default implementation is as follows:
 
 ```python
-class ImageService(WebService):
-
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
+class WebService(object):
+  def get_pipeline_response(self, read_op):
+    return None
+```
+
+Where `read_op` serves as the entry point of the topology map of the whole service (that is, the first op defined by the user is followed by `read_op`).
+
+For single Op service (single model), take Uci service as an example (there is only one Uci prediction model in the whole service):
+
+```python
+class UciService(WebService):
+  def get_pipeline_response(self, read_op):
+    uci_op = UciOp(name="uci", input_ops=[read_op])
+    return uci_op
+```
+
+For multiple Op services (multiple models), take Ocr service as an example (the whole service is completed in series by Det model and Rec model):
+
+```python
+class OcrService(WebService):
+  def get_pipeline_response(self, read_op):
+    det_op = DetOp(name="det", input_ops=[read_op])
+    rec_op = RecOp(name="rec", input_ops=[det_op])
+    return rec_op
+```
+
+
+
+WebService objects need to load a yaml configuration file through the `prepare_pipeline_config` to configure each Op and the entire service. The simplest configuration file is as follows (Uci example):
+
+```yaml
+http_port: 18080
+op:
+    uci:
+        local_service_conf:
+            model_config: uci_housing_model # path
+```
+
+All field names of yaml file are as follows:
+
+```yaml
+rpc_port: 18080  # gRPC port
+build_dag_each_worker: false  # Whether to use process server or not. The default is false
+worker_num: 1  # gRPC thread pool size (the number of processes in the process version servicer). The default is 1
+http_port: 0 # HTTP service port. Do not start HTTP service when the value is less or equals 0. The default value is 0.
+dag:
+    is_thread_op: true  # Whether to use the thread version of OP. The default is true
+    client_type: brpc  # Use brpc or grpc client. The default is brpc
+    retry: 1  # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying
+    use_profile: false  # Whether to print the log on the server side. The default is false
+    tracer:
+        interval_s: -1 # Monitoring time interval of Tracer (in seconds). Do not start monitoring when the value is less than 1. The default value is -1
+op:
+    <op_name>: # op name, corresponding to the one defined in the program
+        concurrency: 1 # op concurrency number, the default is 1
+        timeout: -1 # predict timeout in milliseconds. The default value is -1, that is, no timeout
+        retry: 1 # timeout retransmissions. The default value is 1, that is, do not try again
+        batch_size: 1 # If this field is set, Op will merge multiple request outputs into a single batch
+        auto_batching_timeout: -1 # auto-batching timeout in milliseconds. The default value is -1, that is, no timeout
+        local_service_conf:
+            model_config: # the path of the corresponding model file. There is no default value(None). If this item is not configured, the model file will not be loaded.
+            workdir: "" # working directory of corresponding model
+            thread_num: 2 # the corresponding model is started with thread_num threads
+            devices: "" # on which device does the model launched. You can specify the GPU card number(such as "0,1,2"), which is CPU by default
+            mem_optim: true # mem optimization option, the default is true
+            ir_optim: false # ir optimization option, the default is false
 ```
 
-For the above `ImageService`, only the `preprocess` method is rewritten to process the image data in Base64 format into the data format required by prediction.
+All fields of Op can be defined when Op is created in the program (which will override yaml fields).
diff --git a/doc/NEW_WEB_SERVICE_CN.md b/doc/NEW_WEB_SERVICE_CN.md
index 43ca7fb61f2c70f13019574a7984e3665bd1b6fa..af6730a89badd8214323ea08bbb799033f57f09b 100644
--- a/doc/NEW_WEB_SERVICE_CN.md
+++ b/doc/NEW_WEB_SERVICE_CN.md
@@ -1,56 +1,152 @@
 # 如何开发一个新的Web Service？
 
+
 (简体中文|[English](NEW_WEB_SERVICE.md))
 
-本文档将以Imagenet图像分类服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。
+本文档将以 Uci 房价预测服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/pipeline/simple_web_service/web_service.py)查阅完整的代码。
+
+## Op 基类
+
+在一些服务中，单个模型可能无法满足需求，需要多个模型串联或并联来完成整个服务。我们将单个模型操作称为 Op，并提供了一套简单的接口来实现 Op 串联或并联的复杂逻辑。
 
-## WebService基类
+Op 间数据是以字典形式进行传递的，Op 可以以线程或进程方式启动，同时可以对 Op 的并发数等进行配置。
 
-Paddle Serving实现了[WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23)基类，您需要重写它的`preprocess`方法和`postprocess`方法，默认实现如下：
+通常情况下，您需要继承 Op 基类，重写它的 `init_op`、`preprocess` 和 `postprocess` 方法，默认实现如下：
 
 ```python
-class WebService(object):
-  
-    def preprocess(self, feed={}, fetch=[]):
-        return feed, fetch
-    def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        return fetch_map
+class Op(object):
+  def init_op(self):
+    pass
+  def preprocess(self, input_dicts):
+    # multiple previous Op
+    if len(input_dicts) != 1:
+      _LOGGER.critical(
+        "Failed to run preprocess: this Op has multiple previous "
+        "inputs. Please override this func.")
+      os._exit(-1)
+    (_, input_dict), = input_dicts.items()
+    return input_dict
+  def postprocess(self, input_dicts, fetch_dict):
+    return fetch_dict
 ```
 
-### preprocess方法
+### init_op 方法
+
+该方法用于加载用户自定义资源（如字典等），在 [UciOp](../python/examples/pipeline/simple_web_service/web_service.py) 中加载了一个分隔符。
+
+**注意**：如果 Op 是以线程模式加载的，那么在 Op 多并发时，同种 Op 的不同线程只执行一次 `init_op`，且共用 `init_op` 加载的资源。
+
+### preprocess 方法
+
+该方法用于模型预测前对数据的预处理，它有一个 `input_dicts` 参数，`input_dicts` 是一个字典，key 为前继 Op 的 `name`，value 为对应前继 Op 传递过来的数据（数据同样是字典格式）。
+
+`preprocess` 方法需要将数据处理成 ndarray 字典（key 为 feed 变量名，value 为对应的 ndarray 值），Op 会将该返回值作为模型预测的输入，并将输出传递给 `postprocess` 方法。
 
-preprocess方法有两个输入参数，`feed`和`fetch`。对于一个HTTP请求`request`：
+**注意**：如果 Op 没有配置模型，则 `preprocess` 的返回值会直接传递给 `postprocess`。
 
-- `feed`的值为请求数据中的feed部分`request.json["feed"]`
-- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
+### postprocess 方法
 
-返回值分别是预测过程中用到的feed和fetch值。
+该方法用于模型预测后对数据的后处理，它有两个参数，`input_dicts` 和 `fetch_dict`。
 
-### postprocess方法
+其中，`input_dicts` 与 `preprocess` 的参数相同，`fetch_dict` 为模型预测的输出（key 为 fetch 变量名，value 为对应的 ndarray 值）。Op 会将 `postprocess` 的返回值作为后继 Op `preprocess` 的输入。
 
-postprocess方法有三个输入参数，`feed`、`fetch`和`fetch_map`：
+**注意**：如果 Op 没有配置模型，则 `fetch_dict` 将为 `preprocess` 的返回值。
 
-- `feed`的值为请求数据中的feed部分`request.json["feed"]`
-- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
-- `fetch_map`的值为fetch到的模型输出值
 
-返回值将会被处理成`{"reslut": fetch_map}`作为HTTP请求的返回。
 
-## 开发ImageService类
+下面是 Uci 例子的 Op：
 
 ```python
-class ImageService(WebService):
-
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
+class UciOp(Op):
+    def init_op(self):
+        self.separator = ","
+
+    def preprocess(self, input_dicts):
+        (_, input_dict), = input_dicts.items()
+        x_value = input_dict["x"]
+        if isinstance(x_value, (str, unicode)):
+            input_dict["x"] = np.array(
+                [float(x.strip()) for x in x_value.split(self.separator)])
+        return input_dict
+
+    def postprocess(self, input_dicts, fetch_dict):
+        fetch_dict["price"] = str(fetch_dict["price"][0][0])
+        return fetch_dict
+```
+
+
+
+## WebService 基类
+
+Paddle Serving 实现了 [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L28) 基类，您需要重写它的 `get_pipeline_response` 方法来定义 Op 间的拓扑关系，并返回作为 Response 的 Op，默认实现如下：
+
+```python
+class WebService(object):
+  def get_pipeline_response(self, read_op):
+    return None
+```
+
+其中，`read_op` 作为整个服务拓扑图的入口（即用户自定义的第一个 Op 的前继为 `read_op`）。
+
+对于单 Op 服务（单模型），以 Uci 服务为例（整个服务中只有一个 Uci 房价预测模型）：
+
+```python
+class UciService(WebService):
+  def get_pipeline_response(self, read_op):
+    uci_op = UciOp(name="uci", input_ops=[read_op])
+    return uci_op
+```
+
+对于多 Op 服务（多模型），以 Ocr 服务为例（整个服务由 Det 模型和 Rec 模型串联完成）：
+
+```python
+class OcrService(WebService):
+  def get_pipeline_response(self, read_op):
+    det_op = DetOp(name="det", input_ops=[read_op])
+    rec_op = RecOp(name="rec", input_ops=[det_op])
+    return rec_op
+```
+
+
+
+WebService 对象需要通过 `prepare_pipeline_config` 加载一个 yaml 配置文件，用来对各个 Op 以及整个服务进行配置，最简单的配置文件如下（Uci 例子）：
+
+```yaml
+http_port: 18080
+op:
+    uci:
+        local_service_conf:
+            model_config: uci_housing_model # 路径
+```
+
+yaml 文件的所有字段名详见下面：
+
+```yaml
+rpc_port: 18080  # gRPC端口号
+build_dag_each_worker: false  # 是否使用进程版 Servicer，默认为 false
+worker_num: 1  # gRPC线程池大小（进程版 Servicer 中为进程数），默认为 1
+http_port: 0 # HTTP 服务的端口号，若该值小于或等于 0 则不开启 HTTP 服务，默认为 0
+dag:
+    is_thread_op: true  # 是否使用线程版Op，默认为 true
+    client_type: brpc  # 使用 brpc 或 grpc client，默认为 brpc
+    retry: 1  # DAG Executor 在失败后重试次数，默认为 1，即不重试
+    use_profile: false  # 是否在 Server 端打印日志，默认为 false
+    tracer:
+        interval_s: -1 # Tracer 监控的时间间隔，单位为秒。当该值小于 1 时不启动监控，默认为 -1
+op:
+    <op_name>: # op 名，与程序中定义的相对应
+        concurrency: 1 # op 并发数，默认为 1
+        timeout: -1 # 预测超时时间，单位为毫秒。默认为 -1 即不超时
+        retry: 1 # 超时重发次数。默认为 1 即不重试
+        batch_size: 1 # auto-batching 中的 batch_size，若设置该字段则 Op 会将多个请求输出合并为一个 batch
+        auto_batching_timeout: -1 # auto-batching 超时时间，单位为毫秒。默认为 -1 即不超时
+        local_service_conf:
+            model_config: # 对应模型文件的路径，无默认值（None）。若不配置该项则不会加载模型文件。
+            workdir: "" # 对应模型的工作目录
+            thread_num: 2 # 对应模型用几个线程启动
+            devices: "" # 模型启动在哪个设备上，可以指定 gpu 卡号（如 "0,1,2"），默认为 cpu
+            mem_optim: true # mem 优化选项，默认为 true
+            ir_optim: false # ir 优化选项，默认为 false
 ```
 
-对于上述的`ImageService`，只重写了前处理方法，将base64格式的图片数据处理成模型预测需要的数据格式。
+其中，Op 的所有字段均可以在程序中创建 Op 时定义（会覆盖 yaml 的字段）。
diff --git a/doc/deprecated/NEW_WEB_SERVICE.md b/doc/deprecated/NEW_WEB_SERVICE.md
new file mode 100644
index 0000000000000000000000000000000000000000..39bca98a3bdfbc1b2cadb5d2c3d60395b4592b34
--- /dev/null
+++ b/doc/deprecated/NEW_WEB_SERVICE.md
@@ -0,0 +1,56 @@
+# How to develop a new Web service?
+
+([简体中文](NEW_WEB_SERVICE_CN.md)|English)
+
+This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../python/examples/imagenet/resnet50_web_service.py).
+
+## WebService base class
+
+Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `preprocess` and `postprocess` method. The default implementation is as follows:
+
+```python
+class WebService(object):
+  
+    def preprocess(self, feed={}, fetch=[]):
+        return feed, fetch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        return fetch_map
+```
+
+### preprocess
+
+The preprocess method has two input parameters, `feed` and `fetch`. For an HTTP request `request`:
+
+- The value of `feed` is the feed part `request.json["feed"]` in the request data 
+- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
+
+The return values are the feed and fetch values used in the prediction.
+
+### postprocess
+
+The postprocess method has three input parameters, `feed`, `fetch` and `fetch_map`:
+
+- The value of `feed` is the feed part `request.json["feed"]` in the request data 
+- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
+- The value of `fetch_map` is the model output value.
+
+The return value will be processed as `{"reslut": fetch_map}` as the return of the HTTP request.
+
+## Develop ImageService class
+
+```python
+class ImageService(WebService):
+
+    def preprocess(self, feed={}, fetch=[]):
+        reader = ImageReader()
+        feed_batch = []
+        for ins in feed:
+            if "image" not in ins:
+                raise ("feed data error!")
+            sample = base64.b64decode(ins["image"])
+            img = reader.process_image(sample)
+            feed_batch.append({"image": img})
+        return feed_batch, fetch
+```
+
+For the above `ImageService`, only the `preprocess` method is rewritten to process the image data in Base64 format into the data format required by prediction.
diff --git a/doc/deprecated/NEW_WEB_SERVICE_CN.md b/doc/deprecated/NEW_WEB_SERVICE_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..43ca7fb61f2c70f13019574a7984e3665bd1b6fa
--- /dev/null
+++ b/doc/deprecated/NEW_WEB_SERVICE_CN.md
@@ -0,0 +1,56 @@
+# 如何开发一个新的Web Service？
+
+(简体中文|[English](NEW_WEB_SERVICE.md))
+
+本文档将以Imagenet图像分类服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。
+
+## WebService基类
+
+Paddle Serving实现了[WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23)基类，您需要重写它的`preprocess`方法和`postprocess`方法，默认实现如下：
+
+```python
+class WebService(object):
+  
+    def preprocess(self, feed={}, fetch=[]):
+        return feed, fetch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        return fetch_map
+```
+
+### preprocess方法
+
+preprocess方法有两个输入参数，`feed`和`fetch`。对于一个HTTP请求`request`：
+
+- `feed`的值为请求数据中的feed部分`request.json["feed"]`
+- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
+
+返回值分别是预测过程中用到的feed和fetch值。
+
+### postprocess方法
+
+postprocess方法有三个输入参数，`feed`、`fetch`和`fetch_map`：
+
+- `feed`的值为请求数据中的feed部分`request.json["feed"]`
+- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
+- `fetch_map`的值为fetch到的模型输出值
+
+返回值将会被处理成`{"reslut": fetch_map}`作为HTTP请求的返回。
+
+## 开发ImageService类
+
+```python
+class ImageService(WebService):
+
+    def preprocess(self, feed={}, fetch=[]):
+        reader = ImageReader()
+        feed_batch = []
+        for ins in feed:
+            if "image" not in ins:
+                raise ("feed data error!")
+            sample = base64.b64decode(ins["image"])
+            img = reader.process_image(sample)
+            feed_batch.append({"image": img})
+        return feed_batch, fetch
+```
+
+对于上述的`ImageService`，只重写了前处理方法，将base64格式的图片数据处理成模型预测需要的数据格式。
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 4d6b3ce35aac3bc288b869b23498a19269de3169..4b20cb2001ebb595601f22fa6e4aab8dd5df18f4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -43,6 +43,9 @@ if (SERVER)
     endif()
 endif()
 
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py
+    ${CMAKE_CURRENT_BINARY_DIR}/gen_version.py)
+
 set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so)
 message("python env: " ${py_env})
 
@@ -50,6 +53,7 @@ if (APP)
 add_custom_command(
         OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
         COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
+        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app"
         COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
         DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
@@ -61,6 +65,7 @@ add_custom_command(
 	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
 	COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
     COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
+    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client"
 	COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 	DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
 add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
@@ -71,6 +76,7 @@ if (SERVER)
         add_custom_command(
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
             COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "server"
             COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
             DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
         add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
@@ -79,7 +85,8 @@ if (SERVER)
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
             COMMAND cp -r
             ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
-            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} paddle_serving_server_gpu/gen_cuda_version.py ${CUDA_VERSION_MAJOR}
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
+            "server_gpu" ${CUDA_VERSION_MAJOR}
             COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
             DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
         add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
diff --git a/python/examples/yolov4/test_client.py b/python/examples/yolov4/test_client.py
index 92dcd06552ca1fdd3f2d54060e9de501f052e349..2616e55766192fca676e58efc4f0a2a3d634f1d3 100644
--- a/python/examples/yolov4/test_client.py
+++ b/python/examples/yolov4/test_client.py
@@ -30,7 +30,6 @@ client.load_client_config("yolov4_client/serving_client_conf.prototxt")
 client.connect(['127.0.0.1:9393'])
 
 im = preprocess(sys.argv[1])
-print(im.shape)
 fetch_map = client.predict(
     feed={
         "image": im,
diff --git a/python/gen_version.py b/python/gen_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..258905f5815f6af01398479732b907c80cb9d739
--- /dev/null
+++ b/python/gen_version.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import re
+import os
+import subprocess
+
+
+def update_info(file_name, feature, info):
+    new_str = ""
+    with open(file_name, "r") as f:
+        for line in f.readlines():
+            if re.match(feature, line):
+                if isinstance(info, str):
+                    line = feature + " = \"" + info.strip() + "\"\n"
+                else:
+                    line = feature + " = \"" + info.decode('utf-8').strip(
+                    ) + "\"\n"
+            new_str = new_str + line
+
+    with open(file_name, "w") as f:
+        f.write(new_str)
+
+
+if len(sys.argv) > 2:
+    update_info("paddle_serving_server_gpu/version.py", "cuda_version",
+                sys.argv[2])
+
+path = "paddle_serving_" + sys.argv[1]
+commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+update_info(path + "/version.py", "commit_id", commit_id)
diff --git a/python/paddle_serving_app/version.py b/python/paddle_serving_app/version.py
index 332cba98dd692c4e33da68d4de7763e83e3729b5..554162f4f29a6c28e328c735a71512cd48e59962 100644
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 """ Paddle Serving App version string """
 serving_app_version = "0.1.2"
+commit_id = ""
diff --git a/python/paddle_serving_client/version.py b/python/paddle_serving_client/version.py
index f7fc14b2a7f0c25b471e8d3bb44e9d6db6839d01..015a73dca73360da228877cf5b41188dd396933c 100644
--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -15,3 +15,4 @@
 serving_client_version = "0.3.2"
 serving_server_version = "0.3.2"
 module_proto_version = "0.3.2"
+commit_id = ""
diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py
index f7fc14b2a7f0c25b471e8d3bb44e9d6db6839d01..015a73dca73360da228877cf5b41188dd396933c 100644
--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -15,3 +15,4 @@
 serving_client_version = "0.3.2"
 serving_server_version = "0.3.2"
 module_proto_version = "0.3.2"
+commit_id = ""
diff --git a/python/paddle_serving_server_gpu/gen_cuda_version.py b/python/paddle_serving_server_gpu/gen_cuda_version.py
deleted file mode 100644
index 4a320a0e4dd9f9145a2c7682d5eecb7f582862b5..0000000000000000000000000000000000000000
--- a/python/paddle_serving_server_gpu/gen_cuda_version.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import re
-import os
-
-new_str = ""
-with open("paddle_serving_server_gpu/version.py", "r") as f:
-    for line in f.readlines():
-        if re.match("cuda_version", line):
-            line = re.sub(r"\d+", sys.argv[1], line)
-        new_str = new_str + line
-
-with open("paddle_serving_server_gpu/version.py", "w") as f:
-    f.write(new_str)
diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py
index 2272c3aa91f999697ea8ef3e2cdb585b01db8bed..3952f6e4058589e45de0618e5fc38e3d0aaf0c52 100644
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -16,3 +16,4 @@ serving_client_version = "0.3.2"
 serving_server_version = "0.3.2"
 module_proto_version = "0.3.2"
 cuda_version = "9"
+commit_id = ""