diff --git a/README.md b/README.md
index 9d1ec854ba67d220a481816cda5eeebf2bc89739..17730e2a071facf7c939cb7fb686596b2b752aa6 100644
--- a/README.md
+++ b/README.md
@@ -264,8 +264,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 
 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
-- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
 
diff --git a/README_CN.md b/README_CN.md
index 0c30ef0cffea7d2940c544c55b641255108908fd..3302d4850e8255e8d2d6460c201892fd6035b260 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -270,8 +270,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
 
 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
-- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
-- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
 
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index cab050e732fb701120c7f1a5c80737fc75282324..d4e54c2ac04cf84b2a036f7abe0d426e6f186699 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
       ModelRes model;
       model.set_engine_name(output.engine_name());
 
+      int idx = 0;
+
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         int shape_size = output.insts(0).tensor_array(idx).shape_size();
         VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                 << shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
         idx += 1;
       }
 
+      idx = 0;
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         if (_fetch_name_to_type[name] == 0) {
           VLOG(2) << "ferch var " << name << "type int";
           model._int64_value_map[name].resize(
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
       ModelRes model;
       model.set_engine_name(output.engine_name());
 
+      int idx = 0;
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         int shape_size = output.insts(0).tensor_array(idx).shape_size();
         VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                 << shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
         idx += 1;
       }
 
+      idx = 0;
+
       for (auto &name : fetch_name) {
         // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
         if (_fetch_name_to_type[name] == 0) {
           VLOG(2) << "ferch var " << name << "type int";
           model._int64_value_map[name].resize(
diff --git a/doc/BERT_10_MINS_CN.md b/doc/BERT_10_MINS_CN.md
index 17592000f016f1f1e939e8f3dc6dab6e05f35fe7..b7a5180da1bae2dafc431251f2b98c8a2041856a 100644
--- a/doc/BERT_10_MINS_CN.md
+++ b/doc/BERT_10_MINS_CN.md
@@ -13,10 +13,10 @@ import paddlehub as hub
 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
 inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
-feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
+feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
 fetch_keys = ["pooled_output", "sequence_output"]
 feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
-fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
+fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))
 
 import paddle_serving_client.io as serving_io
 serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)
diff --git a/doc/SAVE.md b/doc/SAVE.md
index 3f7f97e12e1e309ff0933e150ea7bcd23298b60e..4fcdfa438574fac7de21c963f5bb173c69261210 100644
--- a/doc/SAVE.md
+++ b/doc/SAVE.md
@@ -10,8 +10,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                       {"words": data}, {"prediction": prediction},
                       fluid.default_main_program())
 ```
-`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a 
-dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. 
+
+Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
  is as follows:
  ``` python
  from paddle_serving_client import Client
@@ -35,10 +36,14 @@ for line in sys.stdin:
 If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
 ```
 dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
-model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
-paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
+
 serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+
 serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
+
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
+
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
diff --git a/doc/SAVE_CN.md b/doc/SAVE_CN.md
index fc75cd8d015a6d6f42a08f29e4035db20f450d91..3ca715c024a38b6fdce5c973844e7d023eebffcc 100644
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
@@ -11,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                       {"words": data}, {"prediction": prediction},
                       fluid.default_main_program())
 ```
-imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{words”：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
+imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
+
+Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{"words"：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
 
  ``` python
  from paddle_serving_client import Client
@@ -35,10 +37,14 @@ for line in sys.stdin:
 如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client",  model_filename=None, params_filename=None)
 ```
 dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
-model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 __model__ 作为默认的文件名。默认值为None。
+
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
+
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
+
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 `__model__` 作为默认的文件名。默认值为None。
+
 params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
-serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
-serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
diff --git a/doc/UWSGI_DEPLOY.md b/doc/UWSGI_DEPLOY.md
index 02c0488d1bc0c43e050421e0991125fb3a4d644e..cb3fb506bf6fd4461240ebe43234fa3bed3d4784 100644
--- a/doc/UWSGI_DEPLOY.md
+++ b/doc/UWSGI_DEPLOY.md
@@ -1,6 +1,8 @@
-# 使用uwsgi启动HTTP预测服务
+# Deploy HTTP service with uWSGI
 
-在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+([简体中文](./UWSGI_DEPLOY_CN.md)|English)
+
+In fit_a_line example, after starting the HTTP prediction service, you will see the following information:
 
 ```shell
 web service address:
@@ -13,46 +15,31 @@ http://10.127.3.150:9393/uci/prediction
  * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
 ```
 
-这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+Here you will be prompted that the HTTP service started is in development mode and cannot be used for production deployment. 
+The prediction service started by Flask is not stable enough to withstand the concurrency of a large number of requests. In the actual deployment process, WSGI (Web Server Gateway Interface) is used.
 
-下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+Next, we will show how to use the [uWSGI] (https://github.com/unbit/uwsgi) module to deploy HTTP prediction services for production environments.
 
-编写HTTP服务脚本
 
 ```python
 #uwsgi_service.py
 from paddle_serving_server.web_service import WebService
-from flask import Flask, request
 
-#配置预测服务
+#Define prediction service
 uci_service = WebService(name = "uci")
 uci_service.load_model_config("./uci_housing_model")
 uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
 uci_service.run_server()
-
-#配置flask服务
-app_instance = Flask(__name__)
-@app_instance.before_first_request
-def init():
-    global uci_service
-    uci_service._launch_web_service()
-
-service_name = "/" + uci_service.name + "/prediction"
-@app_instance.route(service_name, methods=["POST"])
-def run():
-    return uci_service.get_prediction(request)
-
-#run方法用于直接调试中直接启动服务
-if __name__ == "__main__":
-    app_instance.run()
+#Get flask application
+app_instance = uci_service.get_app_instance()
 ```
 
-使用uwsgi启动HTTP服务
+Start service with uWSGI
 
 ```bash
-uwsgi --http :9000 --wsgi-file uwsgi_service.py --callable app_instance --processes 4
+uwsgi --http :9393 --module uwsgi_service:app_instance
 ```
 
-使用--processes参数可以指定服务的进程数，请注意目前Serving HTTP 服务暂时不支持多线程的方式使用。
+Use the --processes parameter to specify the number of service processes. 
 
-更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
+For more information about uWSGI, please refer to [uWSGI documentation](https://uwsgi-docs.readthedocs.io/en/latest/)
diff --git a/doc/UWSGI_DEPLOY_CN.md b/doc/UWSGI_DEPLOY_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..5bb87e26bbae729f8c21b4681413a4c9f5c4e7c8
--- /dev/null
+++ b/doc/UWSGI_DEPLOY_CN.md
@@ -0,0 +1,45 @@
+# 使用uwsgi启动HTTP预测服务
+
+(简体中文|[English](./UWSGI_DEPLOY.md))
+
+在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+
+```shell
+web service address:
+http://10.127.3.150:9393/uci/prediction
+ * Serving Flask app "serve" (lazy loading)
+ * Environment: production
+   WARNING: This is a development server. Do not use it in a production deployment.
+   Use a production WSGI server instead.
+ * Debug mode: off
+ * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
+```
+
+这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+
+下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+
+编写HTTP服务脚本
+
+```python
+#uwsgi_service.py
+from paddle_serving_server.web_service import WebService
+
+#配置预测服务
+uci_service = WebService(name = "uci")
+uci_service.load_model_config("./uci_housing_model")
+uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
+uci_service.run_server()
+#获取flask服务
+app_instance = uci_service.get_app_instance()
+```
+
+使用uwsgi启动HTTP服务
+
+```bash
+uwsgi --http :9393 --module uwsgi_service:app_instance
+```
+
+使用--processes参数可以指定服务的进程数。
+
+更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py
index 8db64e5eb792a7365ed739bbfb05bf38fd8a0da1..6a5830ea179b033f9f761010d8cf9213d9b1e40b 100644
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -23,10 +23,10 @@ class BertService(WebService):
     def load(self):
         self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
 
-    def preprocess(self, feed={}, fetch=[]):
-        feed_res = [{
-            "words": self.reader.process(ins["words"].encode("utf-8"))
-        } for ins in feed]
+    def preprocess(self, feed=[], fetch=[]):
+        feed_res = [
+            self.reader.process(ins["words"].encode("utf-8")) for ins in feed
+        ]
         return feed_res, fetch
 
 
diff --git a/python/examples/cascade_rcnn/README.md b/python/examples/cascade_rcnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..87617a842fcdb78d039b71634521c9d370f755fa
--- /dev/null
+++ b/python/examples/cascade_rcnn/README.md
@@ -0,0 +1,21 @@
+# Cascade RCNN model on Paddle Serving
+
+([简体中文](./README_CN.md)|English)
+
+### Get The Cascade RCNN Model
+```
+sh get_data.sh
+```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### Start the service
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### Perform prediction
+```
+python test_client.py 
+```
+
+Image with bounding boxes and json result would be saved in `output` folder.
diff --git a/python/examples/cascade_rcnn/README_CN.md b/python/examples/cascade_rcnn/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..a37cb47331ce516c15587c6b2d8b9072c4d878f1
--- /dev/null
+++ b/python/examples/cascade_rcnn/README_CN.md
@@ -0,0 +1,21 @@
+# 使用Paddle Serving部署Cascade RCNN模型
+
+(简体中文|[English](./README.md))
+
+## 获得Cascade RCNN模型
+```
+sh get_data.sh
+```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### 启动服务
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### 执行预测
+```
+python test_client.py
+```
+
+客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
diff --git a/python/examples/cascade_rcnn/get_data.sh b/python/examples/cascade_rcnn/get_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0aa9c7dc340367790eb52f5cc0074cb5d6fd0d05
--- /dev/null
+++ b/python/examples/cascade_rcnn/get_data.sh
@@ -0,0 +1,2 @@
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
+tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
diff --git a/python/examples/imagenet/README.md b/python/examples/imagenet/README.md
index 536440e73ea43f55a4c93bf126d62e86aa3983e6..415818e715e22e97399c710a61f2463fd166bd19 100644
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696
 
 client send inference request
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *the port of server side in this example is 9696
diff --git a/python/examples/imagenet/README_CN.md b/python/examples/imagenet/README_CN.md
index c34ccca32b737467e687dfd5e86c3229f4339075..77ade579ba17ad8247b2f118242642a1d3c79927 100644
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696
 
 client端进行预测
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *server端示例中服务端口为9696端口
diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py
index ece222f74c52614100a119e49c3754e22959b7c8..6b21719e7b665906e7abd02a7a3b8aef50136685 100644
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -39,8 +39,8 @@ def single_func(idx, resource):
         client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
 
         start = time.time()
-        for i in range(1000):
-            img = reader.process_image(img_list[i]).reshape(-1)
+        for i in range(100):
+            img = reader.process_image(img_list[i])
             fetch_map = client.predict(feed={"image": img}, fetch=["score"])
         end = time.time()
         return [[end - start]]
@@ -49,7 +49,7 @@ def single_func(idx, resource):
 
 if __name__ == "__main__":
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
     #card_num = 4
     #for i in range(args.thread):
     #    endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))
diff --git a/python/examples/imagenet/benchmark_batch.py b/python/examples/imagenet/benchmark_batch.py
index e531425770cbf9102b7ebd2f5b082c5c4aa14e71..1646fb9a94d6953f90f9f4907aa74940f13c2730 100644
--- a/python/examples/imagenet/benchmark_batch.py
+++ b/python/examples/imagenet/benchmark_batch.py
@@ -24,6 +24,7 @@ from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
 import requests
 import json
+import base64
 from image_reader import ImageReader
 
 args = benchmark_args()
@@ -36,6 +37,10 @@ def single_func(idx, resource):
     img_list = []
     for i in range(1000):
         img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    profile_flags = False
+    if "FLAGS_profile_client" in os.environ and os.environ[
+            "FLAGS_profile_client"]:
+        profile_flags = True
     if args.request == "rpc":
         reader = ImageReader()
         fetch = ["score"]
@@ -46,23 +51,43 @@ def single_func(idx, resource):
         for i in range(1000):
             if args.batch_size >= 1:
                 feed_batch = []
+                i_start = time.time()
                 for bi in range(args.batch_size):
                     img = reader.process_image(img_list[i])
-                    img = img.reshape(-1)
                     feed_batch.append({"image": img})
+                i_end = time.time()
+                if profile_flags:
+                    print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
+                          format(os.getpid(),
+                                 int(round(i_start * 1000000)),
+                                 int(round(i_end * 1000000))))
+
                 result = client.predict(feed=feed_batch, fetch=fetch)
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
     elif args.request == "http":
-        raise ("no batch predict for http")
+        py_version = 2
+        server = "http://" + resource["endpoint"][idx % len(resource[
+            "endpoint"])] + "/image/prediction"
+        start = time.time()
+        for i in range(1000):
+            if py_version == 2:
+                image = base64.b64encode(
+                    open("./image_data/n01440764/" + file_list[i]).read())
+            else:
+                image = base64.b64encode(open(image_path, "rb").read()).decode(
+                    "utf-8")
+            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+            r = requests.post(
+                server, data=req, headers={"Content-Type": "application/json"})
     end = time.time()
     return [[end - start]]
 
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
     #endpoint_list = endpoint_list + endpoint_list + endpoint_list
     result = multi_thread_runner.run(single_func, args.thread,
                                      {"endpoint": endpoint_list})
diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py
index a734e80ef78a7710ca09a211132e248580c5a48c..b8d7a70f30c5cf2d0ee985a8c30fada8fa9481b3 100644
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -37,26 +37,39 @@ def single_func(idx, resource):
         client.load_client_config(args.model)
         client.connect([args.endpoint])
         for i in range(1000):
-            if args.batch_size == 1:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                fetch_map = client.predict(
-                    feed={"words": word_ids}, fetch=["prediction"])
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
+                        bi])
+                    feed_batch.append({"words": word_ids})
+                result = client.predict(feed=feed_batch, fetch=["prediction"])
+                if result is None:
+                    raise ("predict failed.")
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
     elif args.request == "http":
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                r = requests.post(
-                    "http://{}/imdb/prediction".format(args.endpoint),
-                    data={"words": word_ids,
-                          "fetch": ["prediction"]})
+        if args.batch_size >= 1:
+            feed_batch = []
+            for bi in range(args.batch_size):
+                feed_batch.append({"words": dataset[bi]})
+            r = requests.post(
+                "http://{}/imdb/prediction".format(args.endpoint),
+                json={"feed": feed_batch,
+                      "fetch": ["prediction"]})
+            if r.status_code != 200:
+                print('HTTP status code -ne 200')
+                raise ("predict failed.")
+        else:
+            print("unsupport batch size {}".format(args.batch_size))
     end = time.time()
     return [[end - start]]
 
 
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+avg_cost = 0
+for cost in result[0]:
+    avg_cost += cost
+print("total cost {} s of each thread".format(avg_cost / args.thread))
diff --git a/python/examples/imdb/benchmark.sh b/python/examples/imdb/benchmark.sh
index d77e184180d5c36de6cb865f6b9797511410a3ba..93dbf830c84bd38f72dd0d8a32139ad6098dc6f8 100644
--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
@@ -1,9 +1,12 @@
 rm profile_log
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
     echo "========================================"
     echo "batch size : $batch_size" >> profile_log
     $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
     tail -n 1 profile >> profile_log
 done
+done
diff --git a/python/examples/imdb/benchmark_batch.py b/python/examples/imdb/benchmark_batch.py
deleted file mode 100644
index 5891970b5decc34f35723187e44b166e0482c6e9..0000000000000000000000000000000000000000
--- a/python/examples/imdb/benchmark_batch.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import time
-import requests
-from imdb_reader import IMDBDataset
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-
-args = benchmark_args()
-
-
-def single_func(idx, resource):
-    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource("./imdb.vocab")
-    dataset = []
-    with open("./test_data/part-0") as fin:
-        for line in fin:
-            dataset.append(line.strip())
-    start = time.time()
-    if args.request == "rpc":
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([args.endpoint])
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
-                        bi])
-                    feed_batch.append({"words": word_ids})
-                result = client.predict(feed=feed_batch, fetch=["prediction"])
-                if result is None:
-                    raise ("predict failed.")
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-
-    elif args.request == "http":
-        if args.batch_size >= 1:
-            feed_batch = []
-            for bi in range(args.batch_size):
-                feed_batch.append({"words": dataset[bi]})
-            r = requests.post(
-                "http://{}/imdb/prediction".format(args.endpoint),
-                json={"feed": feed_batch,
-                      "fetch": ["prediction"]})
-            if r.status_code != 200:
-                print('HTTP status code -ne 200')
-                raise ("predict failed.")
-        else:
-            print("unsupport batch size {}".format(args.batch_size))
-    end = time.time()
-    return [[end - start]]
-
-
-multi_thread_runner = MultiThreadRunner()
-result = multi_thread_runner.run(single_func, args.thread, {})
-avg_cost = 0
-for cost in result[0]:
-    avg_cost += cost
-print("total cost {} s of each thread".format(avg_cost / args.thread))
diff --git a/python/examples/imdb/benchmark_batch.sh b/python/examples/imdb/benchmark_batch.sh
deleted file mode 100644
index 15b65338b21675fd89056cf32f9a247b385a6a36..0000000000000000000000000000000000000000
--- a/python/examples/imdb/benchmark_batch.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index fdc3ced25377487a2844d57c4e6121801e9fa7fa..74364e5854d223e380cb386f9a8bc68b8517305a 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys
 
 client = Client()
diff --git a/python/examples/imdb/test_client_batch.py b/python/examples/imdb/test_client_batch.py
deleted file mode 100644
index 972b2c9609ca690542fa802f187fb30ed0467a04..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_client_batch.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def batch_predict(batch_size=4):
-    client = Client()
-    client.load_client_config(conf_file)
-    client.connect(["127.0.0.1:9292"])
-    fetch = ["acc", "cost", "prediction"]
-    feed_batch = []
-    for line in sys.stdin:
-        group = line.strip().split()
-        words = [int(x) for x in group[1:int(group[0])]]
-        label = [int(group[-1])]
-        feed = {"words": words, "label": label}
-        feed_batch.append(feed)
-        if len(feed_batch) == batch_size:
-            fetch_batch = client.batch_predict(
-                feed_batch=feed_batch, fetch=fetch)
-            for i in range(batch_size):
-                print("{} {}".format(fetch_batch[i]["prediction"][1],
-                                     feed_batch[i]["label"][0]))
-            feed_batch = []
-    if len(feed_batch) > 0:
-        fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
-        for i in range(len(feed_batch)):
-            print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
-                "label"][0]))
-
-
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    batch_size = int(sys.argv[2])
-    batch_predict(batch_size)
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 4420a99facc7bd3db1c8bf1df0c58765467517de..ae54b99030ee777ad127242d26c13cdbc05645e9 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -14,7 +14,7 @@
 # pylint: disable=doc-string-missing
 
 from paddle_serving_server.web_service import WebService
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys
 
 
diff --git a/python/examples/senta/senta_web_service.py b/python/examples/senta/senta_web_service.py
index 0c0205e73cdd26231a94b2f0c9c41da84aaca961..5d20020c46d3b5ed23914cb9813ac889e232a2b3 100644
--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -51,13 +51,11 @@ class SentaService(WebService):
     def init_lac_service(self):
         ps = Process(target=self.start_lac_service())
         ps.start()
-        #self.init_lac_client()
+        self.init_lac_client()
 
     def lac_predict(self, feed_data):
-        self.init_lac_client()
         lac_result = self.lac_client.predict(
             feed={"words": feed_data}, fetch=["crf_decode"])
-        self.lac_client.release()
         return lac_result
 
     def init_lac_client(self):
diff --git a/python/paddle_serving_app/README.md b/python/paddle_serving_app/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1756b83993e67dcbc66b6809631c5e953eef08d7
--- /dev/null
+++ b/python/paddle_serving_app/README.md
@@ -0,0 +1,169 @@
+([简体中文](./README_CN.md)|English)
+
+paddle_serving_app is a tool component of the Paddle Serving framework, and includes functions such as pre-training model download and data pre-processing methods.
+It is convenient for users to quickly test and deploy model examples, analyze the performance of prediction services, and debug model prediction services.
+
+## Install
+
+```shell
+pip install paddle_serving_app
+```
+
+## Get model list
+
+```shell
+python -m paddle_serving_app.package --model_list
+```
+
+## Download pre-training model
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+11 pre-trained models are built into paddle_serving_app, covering 6 kinds of prediction tasks.
+The model files can be directly used for deployment, and the `--tutorial` argument can be added to obtain the deployment method.
+
+| Prediction task | Model name                                         |
+| ------------ | ------------------------------------------------ |
+| SentimentAnalysis | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| SemanticRepresentation | 'ernie_base'                                     |
+| ChineseWordSegmentation     | 'lac'                                            |
+| ObjectDetection     | 'faster_rcnn', 'yolov3'                          |
+| ImageSegmentation     | 'unet', 'deeplabv3'                              |
+| ImageClassification     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## Data preprocess API
+
+paddle_serving_app provides a variety of data preprocessing methods for prediction tasks in the field of CV and NLP.
+
+- class ChineseBertReader 
+    
+Preprocessing for Chinese semantic representation task.
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（st ）：Path of dictionary file.
+
+    - max_seq_len（in ，optional）：The length of sample after processing. The excess part will be truncated, and the insufficient part will be padding 0. Default 20.
+
+  - `process(line)`
+
+    - line（st ）：Text input.
+
+  [example](../examples/bert/bert_client.py)
+
+- class LACReader 
+    
+Preprocessing for Chinese word segmentation task.
+
+  - `__init__(dict_floder)`
+    - dict_floder（st ）Path of dictionary file.
+  - `process(sent)`
+    - sent（st ）：Text input.
+  - `parse_result`
+    - words（st ）：Original text input.
+    - crf_decode（np.array）：CRF code predicted by model.
+
+  [example](../examples/bert/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（st ）：Path of dictionary file.
+  - `process(cols)`
+    - cols（st ）：Word segmentation result.
+
+  [example](../examples/senta/senta_web_service.py)
+
+- The image preprocessing method is more flexible than the above method, and can be combined by the following multiple classes，[example](../examples/imagenet/image_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：List of image preprocessing classes
+  - `__call__(img)`
+    - img：The input of image preprocessing. The data type is is related to the first preprocessing method in transforms.
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：Path of image file.
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：url of image file.
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：Mean
+    - std（float）：Variance
+  - `__call__(img)`
+    - img（np.array）：Image data in (C,H,W) channels.
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：
+  - `__call__(img)`
+    - img（np.array）：Image data.
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：The expected image size, when the input is a list type, it needs to contain the expected length and width. When the input is int type, the short side will be set to the length of size, and the long side will be scaled proportionally.
+  - `__call__(img)`
+    - img（numpy array）：Image data.
+
+
+## Timeline tools
+
+The Timeline tool can be used to visualize the start and end time of various stages such as the preparation data of the prediction service, client wait and server op.
+This tool is convenient to analyze the proportion of time occupancy in the prediction service. On this basis, prediction services can be optimized in a targeted manner.
+
+### How to use
+
+1. Before making predictions on the client side, turn on the timeline function of each stage in the Paddle Serving framework by environment variables. It will print timeline information in log.
+
+   ```shell
+   export FLAGS_profile_client=1 # Turn on timeline function of client
+   export FLAGS_profile_server=1 # Turn on timeline function of server
+   ```
+2. Perform predictions and redirect client-side logs to files, for example, named as profile.
+
+3. Export the information in the log file into a trace file.
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. Open the `chrome: // tracing /` URL using Chrome browser. 
+Load the trace file generated in the previous step through the load button, you can
+Visualize the time information of each stage of the forecast service.
+
+As shown in next figure, the figure shows the timeline of GPU prediction service using [bert example](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert).
+The server side starts service with 4 GPU cards, the client side starts 4 processes to request, and the batch size is 1.
+In the figure, bert_pre represents the data pre-processing stage of the client, and client_infer represents the stage where the client completes the sending of the prediction request to the receiving result.
+The process in the figure represents the process number of the client, and the second line of each process shows the timeline of each op of the server.
+
+![timeline](../../doc/timeline-example.png)
+
+## Debug tools
+
+The inference op of Paddle Serving is implemented based on Paddle inference lib.
+Before deploying the prediction service, you may need to check the input and output of the prediction service or check the resource consumption.
+Therefore, a local prediction tool is built into the paddle_serving_app, which is used in the same way as sending a request to the server through the client.
+
+Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
+
+```python
+from paddle_serving_app import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
diff --git a/python/paddle_serving_app/README_CN.md b/python/paddle_serving_app/README_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..75dcf9ae78bec0c00b7662f7427d3816feaeca3d
--- /dev/null
+++ b/python/paddle_serving_app/README_CN.md
@@ -0,0 +1,158 @@
+(简体中文|[English](./README.md))
+
+paddle_serving_app是Paddle Serving框架的工具组件，包含了预训练模型下载、数据预处理方法等功能。方便用户快速体验和部署模型示例、分析预测服务性能、调试模型预测服务等。
+
+## 安装
+
+```shell
+pip install paddle_serving_app
+```
+
+## 获取模型列表
+
+```shell
+python -m paddle_serving_app.package --model_list
+```
+
+## 下载预训练模型
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+paddle_serving_app中内置了11中预训练模型，涵盖了6种预测任务。获取到的模型文件可以直接用于部署，添加`--tutorial`参数可以获取对应的部署方式。
+
+| 预测服务类型 | 模型名称                                         |
+| ------------ | ------------------------------------------------ |
+| 中文情感分析 | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| 语义理解     | 'ernie_base'                                     |
+| 中文分词     | 'lac'                                            |
+| 图像检测     | 'faster_rcnn', 'yolov3'                          |
+| 图像分割     | 'unet', 'deeplabv3'                              |
+| 图像分类     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## 数据预处理API
+
+paddle_serving_app针对CV和NLP领域的模型任务，提供了多种常见的数据预处理方法。
+
+- class ChineseBertReader 
+    
+    中文语义理解模型预处理
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（str）：词典文件路径。
+
+    - max_seq_len（int，可选）：处理后的样本长度，超出的部分会截断，不足的部分会padding 0。默认值20。
+
+  - `process(line)`
+    - line（str）：输入文本
+
+  [参考示例](../examples/bert/bert_client.py)
+
+- class LACReader 中文分词预处理
+
+  - `__init__(dict_floder)`
+    - dict_floder（str）词典文件目录
+  - `process(sent)`
+    - sent（str）：输入文本
+  - `parse_result`
+    - words（str）：原始文本
+    - crf_decode（np.array）：模型预测结果中的CRF编码
+
+  [参考示例](../examples/lac/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（str）：词典文件目录
+  - `process(cols)`
+    - cols（str）：分词后的文本
+
+  [参考示例](../examples/senta/senta_web_service.py)
+
+- 图像的预处理方法相比于上述的方法更加灵活多变，可以通过以下的多个类进行组合，[参考示例](../examples/imagenet/image_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：图像预处理方法类的列表
+  - `__call__(img)`
+    - img：图像处理的输入，具体类型与transforms中的第一个预处理方法有关
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：图像文件路径
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：图像url
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：均值
+    - std（float）：方差
+  - `__call__(img)`
+    - img（np.array）：（C,H,W）排列的图像数据
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：预期的裁剪后的大小，list类型时需要包含预期的长和宽，int类型时会返回边长为size的正方形图片
+  - `__call__(img)`
+    - img（np.array）：输入图像
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：预期的图像大小，list类型时需要包含预期的长和宽，int类型时，短边会设置为size的长度，长边按比例缩放
+  - `__call__(img)`
+    - img（numpy array）：输入图像
+
+## Timeline 工具
+
+通过Timeline工具可以将预测服务的准备数据、client等待、server端op等各阶段起止时间可视化，方便分析预测服务中的时间占用比重，在此基础上有针对性地优化预测服务。
+
+### 使用方式
+
+1. client端在进行预测之前，通过环境变量打开Paddle Serving框架中的各阶段日志打点功能
+
+   ```shell
+   export FLAGS_profile_client=1 #开启client端各阶段时间打点
+   export FLAGS_profile_server=1 #开启server端各阶段时间打点
+   ```
+
+2. 执行预测，并将client端的日志重定向到文件中，例如profile文件。
+
+3. 将日志文件中的信息导出成为trace文件
+
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. 使用chrome浏览器，打开`chrome://tracing/`网址，通过load按钮加载上一步产生的trace文件，即可将预测服务的各阶段时间信息可视化。
+
+   效果如下图，图中展示了使用[bert示例](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert)的GPU预测服务，server端开启4卡预测，client端启动4进程，batch size为1时的各阶段timeline。
+其中bert_pre代表client端的数据预处理阶段，client_infer代表client完成预测请求的发送到接收结果的阶段，图中的process代表的是client的进程号，每个进程的第二行展示的是server各个op的timeline。
+
+   ![timeline](../../doc/timeline-example.png)
+
+## Debug工具
+
+Paddle Serving框架的server预测op使用了Paddle 的预测框架，在部署预测服务之前可能需要对预测服务的输入输出进行检验或者查看资源占用等。因此在paddle_serving_app中内置了本地预测工具，使用方式与通过client向服务端发送请求一致。
+
+以[fit_a_line预测服务](../examples/fit_a_line)为例，使用以下代码即可执行本地预测。
+
+```python
+from paddle_serving_app import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
diff --git a/python/paddle_serving_app/__init__.py b/python/paddle_serving_app/__init__.py
index fd9260284b4103f00ca8b9cda8b99173591d23eb..2a6225570c3de61ba6e0a0587f81175816cd0f8d 100644
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize
+from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, PadStride
 from .reader.lac_reader import LACReader
 from .reader.senta_reader import SentaReader
+from .reader.imdb_reader import IMDBDataset
 from .models import ServingModels
 from .local_predict import Debugger
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
index 6620994165306a550204498e5185bb3aacca8ffd..93039c6fdd467357b589bbb2889f3c2d3208b538 100644
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -115,6 +115,13 @@ class Debugger(object):
 
         inputs = []
         for name in self.feed_names_:
+            if isinstance(feed[name], list):
+                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
+                    name])
+                if self.feed_types_[name] == 0:
+                    feed[name] = feed[name].astype("int64")
+                else:
+                    feed[name] = feed[name].astype("float32")
             inputs.append(PaddleTensor(feed[name][np.newaxis, :]))
 
         outputs = self.predictor.run(inputs)
diff --git a/python/paddle_serving_app/package.py b/python/paddle_serving_app/package.py
index e27914931d4f64c98627cd54025fcf87ac0f241d..250ee99f5130736945a6b77eb4d0bf5a2074a703 100644
--- a/python/paddle_serving_app/package.py
+++ b/python/paddle_serving_app/package.py
@@ -72,7 +72,7 @@ if __name__ == "__main__":
               Usage:
               Download a package for serving directly
               Example:
-                   python -m paddle_serving_app.models --get senta_bilstm
+                   python -m paddle_serving_app.models --get_model senta_bilstm
                    python -m paddle_serving_app.models --list_model
               """)
         pass
diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py
index 01cad9e6bbdbe11191e3bc44ec2c63f2db3939bc..9b556a119d47ec693a667cf7c5ab10c0e56ace53 100644
--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,4 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess, PadStride
diff --git a/python/paddle_serving_app/reader/image_reader.py b/python/paddle_serving_app/reader/image_reader.py
index 8791e94ba8456f25deed1cbd5a2262218327c44e..7988bf447b5a0a075171d93d22dd1933aa8532b8 100644
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -13,14 +13,19 @@
 # limitations under the License.
 import cv2
 import os
-import urllib
 import numpy as np
 import base64
+import sys
 from . import functional as F
 from PIL import Image, ImageDraw
 import json
 
 _cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
+py_version = sys.version_info[0]
+if py_version == 2:
+    import urllib
+else:
+    import urllib.request as urllib
 
 
 def generate_colormap(num_classes):
@@ -393,7 +398,7 @@ class Normalize(object):
 
 class Lambda(object):
     """Apply a user-defined lambda as a transform.
-       Very shame to just copy from 
+       Very shame to just copy from
        https://github.com/pytorch/vision/blob/master/torchvision/transforms/transforms.py#L301
 
     Args:
@@ -465,6 +470,24 @@ class Resize(object):
             _cv2_interpolation_to_str[self.interpolation])
 
 
+class PadStride(object):
+    def __init__(self, stride):
+        self.coarsest_stride = stride
+
+    def __call__(self, img):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride == 0:
+            return img
+        im_c, im_h, im_w = img.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        im_info = {}
+        im_info['resize_shape'] = padding_im.shape[1:]
+        return padding_im
+
+
 class Transpose(object):
     def __init__(self, transpose_target):
         self.transpose_target = transpose_target
diff --git a/python/paddle_serving_app/reader/imdb_reader.py b/python/paddle_serving_app/reader/imdb_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4ef3e163a50b0dc244ac2653df1e38d7f91699b
--- /dev/null
+++ b/python/paddle_serving_app/reader/imdb_reader.py
@@ -0,0 +1,92 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+import sys
+import os
+import paddle
+import re
+import paddle.fluid.incubate.data_generator as dg
+
+py_version = sys.version_info[0]
+
+
+class IMDBDataset(dg.MultiSlotDataGenerator):
+    def load_resource(self, dictfile):
+        self._vocab = {}
+        wid = 0
+        if py_version == 2:
+            with open(dictfile) as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        else:
+            with open(dictfile, encoding="utf-8") as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        self._unk_id = len(self._vocab)
+        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
+        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+
+    def get_words_only(self, line):
+        sent = line.lower().replace("<br />", " ").strip()
+        words = [x for x in self._pattern.split(sent) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas
+
+    def get_words_and_label(self, line):
+        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+                                                              " ").strip()
+        label = [int(line.split('|')[-1])]
+
+        words = [x for x in self._pattern.split(send) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas, label
+
+    def infer_reader(self, infer_filelist, batch, buf_size):
+        def local_iter():
+            for fname in infer_filelist:
+                with open(fname, "r") as fin:
+                    for line in fin:
+                        feas, label = self.get_words_and_label(line)
+                        yield feas, label
+
+        import paddle
+        batch_iter = paddle.batch(
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
+            batch_size=batch)
+        return batch_iter
+
+    def generate_sample(self, line):
+        def memory_iter():
+            for i in range(1000):
+                yield self.return_value
+
+        def data_iter():
+            feas, label = self.get_words_and_label(line)
+            yield ("words", feas), ("label", label)
+
+        return data_iter
+
+
+if __name__ == "__main__":
+    imdb = IMDBDataset()
+    imdb.load_resource("imdb.vocab")
+    imdb.run_from_stdin()
diff --git a/python/paddle_serving_app/trace.py b/python/paddle_serving_app/trace.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7f35b672d8d9bd7e9b8c64c5004eca7b9f6795
--- /dev/null
+++ b/python/paddle_serving_app/trace.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import json
+import sys
+import argparse
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Convert profile log to trace")
+    parser.add_argument(
+        "--profile_file",
+        type=str,
+        default="",
+        required=True,
+        help="Profile log")
+    parser.add_argument(
+        "--trace_file", type=str, default="trace", help="Trace file")
+    return parser.parse_args()
+
+
+def prase(pid_str, time_str, counter):
+    pid = pid_str.split(":")[1]
+    event_list = time_str.split(" ")
+    trace_list = []
+    for event in event_list:
+        name, ts = event.split(":")
+        name_list = name.split("_")
+        ph = "B" if (name_list[-1] == "0") else "E"
+        if len(name_list) == 2:
+            name = name_list[0]
+        else:
+            name = name_list[0] + "_" + name_list[1]
+        event_dict = {}
+        event_dict["name"] = name
+        event_dict["tid"] = 0
+        event_dict["pid"] = pid
+        event_dict["ts"] = ts
+        event_dict["ph"] = ph
+
+        trace_list.append(event_dict)
+    return trace_list
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    profile_file = args.profile_file
+    trace_file = args.trace_file
+    all_list = []
+    counter = 0
+    with open(profile_file) as f:
+        for line in f.readlines():
+            line = line.strip().split("\t")
+            if line[0] == "PROFILE":
+                trace_list = prase(line[1], line[2], counter)
+                counter += 1
+                for trace in trace_list:
+                    all_list.append(trace)
+
+    trace = json.dumps(all_list, indent=2, separators=(',', ':'))
+    with open(trace_file, "w") as f:
+        f.write(trace)
diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py
index 4f174866e5521577ba35f39216f7dd0793879a6c..93ae37056320c2c7d779c5bbfc4d004a1be4f639 100644
--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -104,10 +104,10 @@ def save_model(server_model_folder,
 
 
 def inference_model_to_serving(dirname,
-                               model_filename=None,
-                               params_filename=None,
                                serving_server="serving_server",
-                               serving_client="serving_client"):
+                               serving_client="serving_client",
+                               model_filename=None,
+                               params_filename=None):
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     inference_program, feed_target_names, fetch_targets = \
diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py
index 971359fca0df3a122b28889e0711c86364a1c45d..3cb96a8f04922362fdb4b4c497f7679355e3879f 100644
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -274,7 +274,8 @@ class Server(object):
                 self.model_config_paths[node.name] = path
             print("You have specified multiple model paths, please ensure "
                   "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
         else:
             raise Exception("The type of model_config_paths must be str or "
                             "dict({op: model_path}), not {}.".format(
diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py
index 7e69b241f50255aa69d34c1405b72eacb675be04..f8c43707660e08e1bc44fdd62e40e20523f6cb6d 100755
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -101,7 +101,6 @@ class WebService(object):
         p_rpc = Process(target=self._launch_rpc_service)
         p_rpc.start()
 
-    def run_flask(self):
         app_instance = Flask(__name__)
 
         @app_instance.before_first_request
@@ -114,10 +113,16 @@ class WebService(object):
         def run():
             return self.get_prediction(request)
 
-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return self.app_instance
 
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 5a06bd712a836617047b0cc947956fc5d2213daa..7acc926c7f7fc465da20a7609bc767a5289d2e61 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -320,7 +320,8 @@ class Server(object):
                 self.model_config_paths[node.name] = path
             print("You have specified multiple model paths, please ensure "
                   "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
         else:
             raise Exception("The type of model_config_paths must be str or "
                             "dict({op: model_path}), not {}.".format(
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index 2ec996b1db89bdff3c4550caa566bec5af2d9506..e64e73197d02a80e43bbc77a7589ab43efe2f244 100644
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -151,7 +151,6 @@ class WebService(object):
         for p in server_pros:
             p.start()
 
-    def run_flask(self):
         app_instance = Flask(__name__)
 
         @app_instance.before_first_request
@@ -164,10 +163,16 @@ class WebService(object):
         def run():
             return self.get_prediction(request)
 
-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return app_instance
 
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch
diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel
index dd5a2ef786ed8a9c239a99cabbcfe2d482e6341c..5223693d846bdbc90bdefe58c26db29d6a81359d 100644
--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
     source /root/.bashrc && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
     yum clean all
diff --git a/tools/Dockerfile.centos6.gpu.devel b/tools/Dockerfile.centos6.gpu.devel
index c34780c151e960134af5f8b448e0465b8285e8b2..1432d49abe9a4aec3b558d855c9cfcf30efef461 100644
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
     source /root/.bashrc && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
     yum clean all
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index 6cb228f587054d5b579df0d85109d41c15c128e9..385e568273eab54f7dfa51a20bb7dcd89cfa98a8 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -20,5 +20,5 @@ RUN yum -y install wget >/dev/null \
     && rm get-pip.py \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
     && yum clean all
diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel
index 8cd7a6dbbddd5e1b60b7833086aa25cd849da519..2ffbe4601e1f7e9b05c87f9562b3e0ffc4b967ff 100644
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -21,5 +21,5 @@ RUN yum -y install wget >/dev/null \
     && rm get-pip.py \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
     && yum clean all
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index a522efe19cb9f4170341f291d8c30db0e6749ad1..43e55174ab30374d853ed1bb25aa4a9cc637afd5 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -343,7 +343,7 @@ function python_test_imdb() {
             sleep 5
             check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
             # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
             echo "imdb CPU RPC inference pass"
             kill_server_process
             rm -rf work_dir1
@@ -359,7 +359,7 @@ function python_test_imdb() {
                 exit 1
             fi
             # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
             setproxy # recover proxy state
             kill_server_process
             ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill