Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into pyserving

e507aed1 · barrierye · 2f6a74f2 · ffeb9903 · e507aed1 · e507aed1
43 changed file
--- a/README.md
+++ b/README.md
@@ -264,8 +264,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa

 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)


--- a/README_CN.md
+++ b/README_CN.md
@@ -270,8 +270,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa

 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)


--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());

+      int idx = 0;
+
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
        idx += 1;
      }

+      idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());

+      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
        idx += 1;
      }

+      idx = 0;
+
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(

--- a/doc/BERT_10_MINS_CN.md
+++ b/doc/BERT_10_MINS_CN.md
@@ -13,10 +13,10 @@ import paddlehub as hub
 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
 inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
-feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
+feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
 fetch_keys = ["pooled_output", "sequence_output"]
 feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
-fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
+fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))

 import paddle_serving_client.io as serving_io
 serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)

--- a/doc/SAVE.md
+++ b/doc/SAVE.md
@@ -10,8 +10,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a 
-dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. 
+
+Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
 is as follows:
 ``` python
 from paddle_serving_client import Client
@@ -35,10 +36,14 @@ for line in sys.stdin:
 If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
 ```
 dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
-model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
-paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
+
 serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+
 serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
+
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
+
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
@@ -11,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{words”：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
+imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
+
+Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{"words"：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：

 ``` python
 from paddle_serving_client import Client
@@ -35,10 +37,14 @@ for line in sys.stdin:
 如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client",  model_filename=None, params_filename=None)
 ```
 dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
-model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 __model__ 作为默认的文件名。默认值为None。
+
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
+
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
+
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 `__model__` 作为默认的文件名。默认值为None。
+
 params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
-serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
-serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
--- a/doc/UWSGI_DEPLOY.md
+++ b/doc/UWSGI_DEPLOY.md
-# 使用uwsgi启动HTTP预测服务
+# Deploy HTTP service with uWSGI

-在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+([简体中文](./UWSGI_DEPLOY_CN.md)|English)
+
+In fit_a_line example, after starting the HTTP prediction service, you will see the following information:

 ```shell
 web service address:
@@ -13,46 +15,31 @@ http://10.127.3.150:9393/uci/prediction
 * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
 ```

-这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+Here you will be prompted that the HTTP service started is in development mode and cannot be used for production deployment. 
+The prediction service started by Flask is not stable enough to withstand the concurrency of a large number of requests. In the actual deployment process, WSGI (Web Server Gateway Interface) is used.

-下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+Next, we will show how to use the [uWSGI] (https://github.com/unbit/uwsgi) module to deploy HTTP prediction services for production environments.

-编写HTTP服务脚本

 ```python
 #uwsgi_service.py
 from paddle_serving_server.web_service import WebService
-from flask import Flask, request

-#配置预测服务
+#Define prediction service
 uci_service = WebService(name = "uci")
 uci_service.load_model_config("./uci_housing_model")
 uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
 uci_service.run_server()
-
-#配置flask服务
-app_instance = Flask(__name__)
-@app_instance.before_first_request
-def init():
-    global uci_service
-    uci_service._launch_web_service()
-
-service_name = "/" + uci_service.name + "/prediction"
-@app_instance.route(service_name, methods=["POST"])
-def run():
-    return uci_service.get_prediction(request)
-
-#run方法用于直接调试中直接启动服务
-if __name__ == "__main__":
-    app_instance.run()
+#Get flask application
+app_instance = uci_service.get_app_instance()
 ```

-使用uwsgi启动HTTP服务
+Start service with uWSGI

 ```bash
-uwsgi --http :9000 --wsgi-file uwsgi_service.py --callable app_instance --processes 4
+uwsgi --http :9393 --module uwsgi_service:app_instance
 ```

-使用--processes参数可以指定服务的进程数，请注意目前Serving HTTP 服务暂时不支持多线程的方式使用。
+Use the --processes parameter to specify the number of service processes. 

-更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
+For more information about uWSGI, please refer to [uWSGI documentation](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/doc/UWSGI_DEPLOY_CN.md
+++ b/doc/UWSGI_DEPLOY_CN.md
+# 使用uwsgi启动HTTP预测服务
+
+(简体中文|[English](./UWSGI_DEPLOY.md))
+
+在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+
+```shell
+web service address:
+http://10.127.3.150:9393/uci/prediction
+ * Serving Flask app "serve" (lazy loading)
+ * Environment: production
+   WARNING: This is a development server. Do not use it in a production deployment.
+   Use a production WSGI server instead.
+ * Debug mode: off
+ * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
+```
+
+这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+
+下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+
+编写HTTP服务脚本
+
+```python
+#uwsgi_service.py
+from paddle_serving_server.web_service import WebService
+
+#配置预测服务
+uci_service = WebService(name = "uci")
+uci_service.load_model_config("./uci_housing_model")
+uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
+uci_service.run_server()
+#获取flask服务
+app_instance = uci_service.get_app_instance()
+```
+
+使用uwsgi启动HTTP服务
+
+```bash
+uwsgi --http :9393 --module uwsgi_service:app_instance
+```
+
+使用--processes参数可以指定服务的进程数。
+
+更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -23,10 +23,10 @@ class BertService(WebService):
    def load(self):
        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)

-    def preprocess(self, feed={}, fetch=[]):
-        feed_res = [{
-            "words": self.reader.process(ins["words"].encode("utf-8"))
-        } for ins in feed]
+    def preprocess(self, feed=[], fetch=[]):
+        feed_res = [
+            self.reader.process(ins["words"].encode("utf-8")) for ins in feed
+        ]
        return feed_res, fetch



--- a/python/examples/cascade_rcnn/README.md
+++ b/python/examples/cascade_rcnn/README.md
+# Cascade RCNN model on Paddle Serving
+
+([简体中文](./README_CN.md)|English)
+
+### Get The Cascade RCNN Model
+```
+sh get_data.sh
+```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### Start the service
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### Perform prediction
+```
+python test_client.py 
+```
+
+Image with bounding boxes and json result would be saved in `output` folder.
--- a/python/examples/cascade_rcnn/README_CN.md
+++ b/python/examples/cascade_rcnn/README_CN.md
+# 使用Paddle Serving部署Cascade RCNN模型
+
+(简体中文|[English](./README.md))
+
+## 获得Cascade RCNN模型
+```
+sh get_data.sh
+```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### 启动服务
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### 执行预测
+```
+python test_client.py
+```
+
+客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
--- a/python/examples/cascade_rcnn/get_data.sh
+++ b/python/examples/cascade_rcnn/get_data.sh
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
+tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696

 client send inference request
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *the port of server side in this example is 9696
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696

 client端进行预测
 ```
-python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *server端示例中服务端口为9696端口
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -39,8 +39,8 @@ def single_func(idx, resource):
        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])

        start = time.time()
-        for i in range(1000):
-            img = reader.process_image(img_list[i]).reshape(-1)
+        for i in range(100):
+            img = reader.process_image(img_list[i])
            fetch_map = client.predict(feed={"image": img}, fetch=["score"])
        end = time.time()
        return [[end - start]]
@@ -49,7 +49,7 @@ def single_func(idx, resource):

 if __name__ == "__main__":
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
    #card_num = 4
    #for i in range(args.thread):
    #    endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))

--- a/python/examples/imagenet/benchmark_batch.py
+++ b/python/examples/imagenet/benchmark_batch.py
@@ -24,6 +24,7 @@ from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
 import requests
 import json
+import base64
 from image_reader import ImageReader

 args = benchmark_args()
@@ -36,6 +37,10 @@ def single_func(idx, resource):
    img_list = []
    for i in range(1000):
        img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    profile_flags = False
+    if "FLAGS_profile_client" in os.environ and os.environ[
+            "FLAGS_profile_client"]:
+        profile_flags = True
    if args.request == "rpc":
        reader = ImageReader()
        fetch = ["score"]
@@ -46,23 +51,43 @@ def single_func(idx, resource):
        for i in range(1000):
            if args.batch_size >= 1:
                feed_batch = []
+                i_start = time.time()
                for bi in range(args.batch_size):
                    img = reader.process_image(img_list[i])
-                    img = img.reshape(-1)
                    feed_batch.append({"image": img})
+                i_end = time.time()
+                if profile_flags:
+                    print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
+                          format(os.getpid(),
+                                 int(round(i_start * 1000000)),
+                                 int(round(i_end * 1000000))))
+
                result = client.predict(feed=feed_batch, fetch=fetch)
            else:
                print("unsupport batch size {}".format(args.batch_size))

    elif args.request == "http":
-        raise ("no batch predict for http")
+        py_version = 2
+        server = "http://" + resource["endpoint"][idx % len(resource[
+            "endpoint"])] + "/image/prediction"
+        start = time.time()
+        for i in range(1000):
+            if py_version == 2:
+                image = base64.b64encode(
+                    open("./image_data/n01440764/" + file_list[i]).read())
+            else:
+                image = base64.b64encode(open(image_path, "rb").read()).decode(
+                    "utf-8")
+            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+            r = requests.post(
+                server, data=req, headers={"Content-Type": "application/json"})
    end = time.time()
    return [[end - start]]


 if __name__ == '__main__':
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})

--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -37,26 +37,39 @@ def single_func(idx, resource):
        client.load_client_config(args.model)
        client.connect([args.endpoint])
        for i in range(1000):
-            if args.batch_size == 1:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                fetch_map = client.predict(
-                    feed={"words": word_ids}, fetch=["prediction"])
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
+                        bi])
+                    feed_batch.append({"words": word_ids})
+                result = client.predict(feed=feed_batch, fetch=["prediction"])
+                if result is None:
+                    raise ("predict failed.")
            else:
                print("unsupport batch size {}".format(args.batch_size))

    elif args.request == "http":
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
+        if args.batch_size >= 1:
+            feed_batch = []
+            for bi in range(args.batch_size):
+                feed_batch.append({"words": dataset[bi]})
            r = requests.post(
                "http://{}/imdb/prediction".format(args.endpoint),
-                    data={"words": word_ids,
+                json={"feed": feed_batch,
                      "fetch": ["prediction"]})
+            if r.status_code != 200:
+                print('HTTP status code -ne 200')
+                raise ("predict failed.")
+        else:
+            print("unsupport batch size {}".format(args.batch_size))
    end = time.time()
    return [[end - start]]


 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+avg_cost = 0
+for cost in result[0]:
+    avg_cost += cost
+print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
 rm profile_log
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
    echo "========================================"
    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log
 done
+done
--- a/python/examples/imdb/benchmark_batch.py
+++ b/python/examples/imdb/benchmark_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import time
-import requests
-from imdb_reader import IMDBDataset
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-
-args = benchmark_args()
-
-
-def single_func(idx, resource):
-    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource("./imdb.vocab")
-    dataset = []
-    with open("./test_data/part-0") as fin:
-        for line in fin:
-            dataset.append(line.strip())
-    start = time.time()
-    if args.request == "rpc":
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([args.endpoint])
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
-                        bi])
-                    feed_batch.append({"words": word_ids})
-                result = client.predict(feed=feed_batch, fetch=["prediction"])
-                if result is None:
-                    raise ("predict failed.")
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-
-    elif args.request == "http":
-        if args.batch_size >= 1:
-            feed_batch = []
-            for bi in range(args.batch_size):
-                feed_batch.append({"words": dataset[bi]})
-            r = requests.post(
-                "http://{}/imdb/prediction".format(args.endpoint),
-                json={"feed": feed_batch,
-                      "fetch": ["prediction"]})
-            if r.status_code != 200:
-                print('HTTP status code -ne 200')
-                raise ("predict failed.")
-        else:
-            print("unsupport batch size {}".format(args.batch_size))
-    end = time.time()
-    return [[end - start]]
-
-
-multi_thread_runner = MultiThreadRunner()
-result = multi_thread_runner.run(single_func, args.thread, {})
-avg_cost = 0
-for cost in result[0]:
-    avg_cost += cost
-print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark_batch.sh
+++ b/python/examples/imdb/benchmark_batch.sh
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys

 client = Client()

--- a/python/examples/imdb/test_client_batch.py
+++ b/python/examples/imdb/test_client_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def batch_predict(batch_size=4):
-    client = Client()
-    client.load_client_config(conf_file)
-    client.connect(["127.0.0.1:9292"])
-    fetch = ["acc", "cost", "prediction"]
-    feed_batch = []
-    for line in sys.stdin:
-        group = line.strip().split()
-        words = [int(x) for x in group[1:int(group[0])]]
-        label = [int(group[-1])]
-        feed = {"words": words, "label": label}
-        feed_batch.append(feed)
-        if len(feed_batch) == batch_size:
-            fetch_batch = client.batch_predict(
-                feed_batch=feed_batch, fetch=fetch)
-            for i in range(batch_size):
-                print("{} {}".format(fetch_batch[i]["prediction"][1],
-                                     feed_batch[i]["label"][0]))
-            feed_batch = []
-    if len(feed_batch) > 0:
-        fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
-        for i in range(len(feed_batch)):
-            print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
-                "label"][0]))
-
-
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    batch_size = int(sys.argv[2])
-    batch_predict(batch_size)
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -14,7 +14,7 @@
 # pylint: disable=doc-string-missing

 from paddle_serving_server.web_service import WebService
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys



--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -51,13 +51,11 @@ class SentaService(WebService):
    def init_lac_service(self):
        ps = Process(target=self.start_lac_service())
        ps.start()
-        #self.init_lac_client()
+        self.init_lac_client()

    def lac_predict(self, feed_data):
-        self.init_lac_client()
        lac_result = self.lac_client.predict(
            feed={"words": feed_data}, fetch=["crf_decode"])
-        self.lac_client.release()
        return lac_result

    def init_lac_client(self):

--- a/python/paddle_serving_app/README.md
+++ b/python/paddle_serving_app/README.md
+([简体中文](./README_CN.md)|English)
+
+paddle_serving_app is a tool component of the Paddle Serving framework, and includes functions such as pre-training model download and data pre-processing methods.
+It is convenient for users to quickly test and deploy model examples, analyze the performance of prediction services, and debug model prediction services.
+
+## Install
+
+```shell
+pip install paddle_serving_app
+```
+
+## Get model list
+
+```shell
+python -m paddle_serving_app.package --model_list
+```
+
+## Download pre-training model
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+11 pre-trained models are built into paddle_serving_app, covering 6 kinds of prediction tasks.
+The model files can be directly used for deployment, and the `--tutorial` argument can be added to obtain the deployment method.
+
+| Prediction task | Model name                                         |
+| ------------ | ------------------------------------------------ |
+| SentimentAnalysis | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| SemanticRepresentation | 'ernie_base'                                     |
+| ChineseWordSegmentation     | 'lac'                                            |
+| ObjectDetection     | 'faster_rcnn', 'yolov3'                          |
+| ImageSegmentation     | 'unet', 'deeplabv3'                              |
+| ImageClassification     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## Data preprocess API
+
+paddle_serving_app provides a variety of data preprocessing methods for prediction tasks in the field of CV and NLP.
+
+- class ChineseBertReader 
+    
+Preprocessing for Chinese semantic representation task.
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（st ）：Path of dictionary file.
+
+    - max_seq_len（in ，optional）：The length of sample after processing. The excess part will be truncated, and the insufficient part will be padding 0. Default 20.
+
+  - `process(line)`
+
+    - line（st ）：Text input.
+
+  [example](../examples/bert/bert_client.py)
+
+- class LACReader 
+    
+Preprocessing for Chinese word segmentation task.
+
+  - `__init__(dict_floder)`
+    - dict_floder（st ）Path of dictionary file.
+  - `process(sent)`
+    - sent（st ）：Text input.
+  - `parse_result`
+    - words（st ）：Original text input.
+    - crf_decode（np.array）：CRF code predicted by model.
+
+  [example](../examples/bert/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（st ）：Path of dictionary file.
+  - `process(cols)`
+    - cols（st ）：Word segmentation result.
+
+  [example](../examples/senta/senta_web_service.py)
+
+- The image preprocessing method is more flexible than the above method, and can be combined by the following multiple classes，[example](../examples/imagenet/image_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：List of image preprocessing classes
+  - `__call__(img)`
+    - img：The input of image preprocessing. The data type is is related to the first preprocessing method in transforms.
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：Path of image file.
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：url of image file.
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：Mean
+    - std（float）：Variance
+  - `__call__(img)`
+    - img（np.array）：Image data in (C,H,W) channels.
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：
+  - `__call__(img)`
+    - img（np.array）：Image data.
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：The expected image size, when the input is a list type, it needs to contain the expected length and width. When the input is int type, the short side will be set to the length of size, and the long side will be scaled proportionally.
+  - `__call__(img)`
+    - img（numpy array）：Image data.
+
+
+## Timeline tools
+
+The Timeline tool can be used to visualize the start and end time of various stages such as the preparation data of the prediction service, client wait and server op.
+This tool is convenient to analyze the proportion of time occupancy in the prediction service. On this basis, prediction services can be optimized in a targeted manner.
+
+### How to use
+
+1. Before making predictions on the client side, turn on the timeline function of each stage in the Paddle Serving framework by environment variables. It will print timeline information in log.
+
+   ```shell
+   export FLAGS_profile_client=1 # Turn on timeline function of client
+   export FLAGS_profile_server=1 # Turn on timeline function of server
+   ```
+2. Perform predictions and redirect client-side logs to files, for example, named as profile.
+
+3. Export the information in the log file into a trace file.
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. Open the `chrome: // tracing /` URL using Chrome browser. 
+Load the trace file generated in the previous step through the load button, you can
+Visualize the time information of each stage of the forecast service.
+
+As shown in next figure, the figure shows the timeline of GPU prediction service using [bert example](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert).
+The server side starts service with 4 GPU cards, the client side starts 4 processes to request, and the batch size is 1.
+In the figure, bert_pre represents the data pre-processing stage of the client, and client_infer represents the stage where the client completes the sending of the prediction request to the receiving result.
+The process in the figure represents the process number of the client, and the second line of each process shows the timeline of each op of the server.
+
+![timeline](../../doc/timeline-example.png)
+
+## Debug tools
+
+The inference op of Paddle Serving is implemented based on Paddle inference lib.
+Before deploying the prediction service, you may need to check the input and output of the prediction service or check the resource consumption.
+Therefore, a local prediction tool is built into the paddle_serving_app, which is used in the same way as sending a request to the server through the client.
+
+Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
+
+```python
+from paddle_serving_app import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
--- a/python/paddle_serving_app/README_CN.md
+++ b/python/paddle_serving_app/README_CN.md
+(简体中文|[English](./README.md))
+
+paddle_serving_app是Paddle Serving框架的工具组件，包含了预训练模型下载、数据预处理方法等功能。方便用户快速体验和部署模型示例、分析预测服务性能、调试模型预测服务等。
+
+## 安装
+
+```shell
+pip install paddle_serving_app
+```
+
+## 获取模型列表
+
+```shell
+python -m paddle_serving_app.package --model_list
+```
+
+## 下载预训练模型
+
+```shell
+python -m paddle_serving_app.package --get_model senta_bilstm
+```
+
+paddle_serving_app中内置了11中预训练模型，涵盖了6种预测任务。获取到的模型文件可以直接用于部署，添加`--tutorial`参数可以获取对应的部署方式。
+
+| 预测服务类型 | 模型名称                                         |
+| ------------ | ------------------------------------------------ |
+| 中文情感分析 | 'senta_bilstm', 'senta_bow', 'senta_cnn'         |
+| 语义理解     | 'ernie_base'                                     |
+| 中文分词     | 'lac'                                            |
+| 图像检测     | 'faster_rcnn', 'yolov3'                          |
+| 图像分割     | 'unet', 'deeplabv3'                              |
+| 图像分类     | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
+
+## 数据预处理API
+
+paddle_serving_app针对CV和NLP领域的模型任务，提供了多种常见的数据预处理方法。
+
+- class ChineseBertReader 
+    
+    中文语义理解模型预处理
+
+  - `__init__(vocab_file, max_seq_len=20)`
+
+    - vocab_file（str）：词典文件路径。
+
+    - max_seq_len（int，可选）：处理后的样本长度，超出的部分会截断，不足的部分会padding 0。默认值20。
+
+  - `process(line)`
+    - line（str）：输入文本
+
+  [参考示例](../examples/bert/bert_client.py)
+
+- class LACReader 中文分词预处理
+
+  - `__init__(dict_floder)`
+    - dict_floder（str）词典文件目录
+  - `process(sent)`
+    - sent（str）：输入文本
+  - `parse_result`
+    - words（str）：原始文本
+    - crf_decode（np.array）：模型预测结果中的CRF编码
+
+  [参考示例](../examples/lac/lac_web_service.py)
+
+- class SentaReader
+
+  - `__init__(vocab_path)`
+    - vocab_path（str）：词典文件目录
+  - `process(cols)`
+    - cols（str）：分词后的文本
+
+  [参考示例](../examples/senta/senta_web_service.py)
+
+- 图像的预处理方法相比于上述的方法更加灵活多变，可以通过以下的多个类进行组合，[参考示例](../examples/imagenet/image_rpc_client.py)
+
+- class Sequentia
+
+  - `__init__(transforms)`
+    - transforms（list）：图像预处理方法类的列表
+  - `__call__(img)`
+    - img：图像处理的输入，具体类型与transforms中的第一个预处理方法有关
+
+- class File2Image
+
+  - `__call__(img_path)`
+    - img_path（str）：图像文件路径
+
+- class URL2Image
+
+  - `__call__(img_url)`
+    - img_url（str）：图像url
+
+- class Normalize
+
+  - `__init__(mean,std)`
+    - mean（float）：均值
+    - std（float）：方差
+  - `__call__(img)`
+    - img（np.array）：（C,H,W）排列的图像数据
+
+- class CenterCrop
+
+  - `__init__(size)`
+    - size（list/int）：预期的裁剪后的大小，list类型时需要包含预期的长和宽，int类型时会返回边长为size的正方形图片
+  - `__call__(img)`
+    - img（np.array）：输入图像
+
+- class Resize
+
+  - `__init__(size, max_size=2147483647, interpolation=None)`
+    - size（list/int）：预期的图像大小，list类型时需要包含预期的长和宽，int类型时，短边会设置为size的长度，长边按比例缩放
+  - `__call__(img)`
+    - img（numpy array）：输入图像
+
+## Timeline 工具
+
+通过Timeline工具可以将预测服务的准备数据、client等待、server端op等各阶段起止时间可视化，方便分析预测服务中的时间占用比重，在此基础上有针对性地优化预测服务。
+
+### 使用方式
+
+1. client端在进行预测之前，通过环境变量打开Paddle Serving框架中的各阶段日志打点功能
+
+   ```shell
+   export FLAGS_profile_client=1 #开启client端各阶段时间打点
+   export FLAGS_profile_server=1 #开启server端各阶段时间打点
+   ```
+
+2. 执行预测，并将client端的日志重定向到文件中，例如profile文件。
+
+3. 将日志文件中的信息导出成为trace文件
+
+   ```shell
+   python -m paddle_serving_app.trace --profile_file profile --trace_file trace
+   ```
+
+4. 使用chrome浏览器，打开`chrome://tracing/`网址，通过load按钮加载上一步产生的trace文件，即可将预测服务的各阶段时间信息可视化。
+
+   效果如下图，图中展示了使用[bert示例](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert)的GPU预测服务，server端开启4卡预测，client端启动4进程，batch size为1时的各阶段timeline。
+其中bert_pre代表client端的数据预处理阶段，client_infer代表client完成预测请求的发送到接收结果的阶段，图中的process代表的是client的进程号，每个进程的第二行展示的是server各个op的timeline。
+
+   ![timeline](../../doc/timeline-example.png)
+
+## Debug工具
+
+Paddle Serving框架的server预测op使用了Paddle 的预测框架，在部署预测服务之前可能需要对预测服务的输入输出进行检验或者查看资源占用等。因此在paddle_serving_app中内置了本地预测工具，使用方式与通过client向服务端发送请求一致。
+
+以[fit_a_line预测服务](../examples/fit_a_line)为例，使用以下代码即可执行本地预测。
+
+```python
+from paddle_serving_app import Debugger
+import numpy as np
+
+debugger = Debugger()
+debugger.load_model_config("./uci_housing_model", gpu=False)
+data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
+        -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
+fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
+```
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize
+from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, PadStride
 from .reader.lac_reader import LACReader
 from .reader.senta_reader import SentaReader
+from .reader.imdb_reader import IMDBDataset
 from .models import ServingModels
 from .local_predict import Debugger
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -115,6 +115,13 @@ class Debugger(object):

        inputs = []
        for name in self.feed_names_:
+            if isinstance(feed[name], list):
+                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
+                    name])
+                if self.feed_types_[name] == 0:
+                    feed[name] = feed[name].astype("int64")
+                else:
+                    feed[name] = feed[name].astype("float32")
            inputs.append(PaddleTensor(feed[name][np.newaxis, :]))

        outputs = self.predictor.run(inputs)

--- a/python/paddle_serving_app/package.py
+++ b/python/paddle_serving_app/package.py
@@ -72,7 +72,7 @@ if __name__ == "__main__":
              Usage:
              Download a package for serving directly
              Example:
-                   python -m paddle_serving_app.models --get senta_bilstm
+                   python -m paddle_serving_app.models --get_model senta_bilstm
                   python -m paddle_serving_app.models --list_model
              """)
        pass
--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,4 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess, PadStride
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -13,14 +13,19 @@
 # limitations under the License.
 import cv2
 import os
-import urllib
 import numpy as np
 import base64
+import sys
 from . import functional as F
 from PIL import Image, ImageDraw
 import json

 _cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
+py_version = sys.version_info[0]
+if py_version == 2:
+    import urllib
+else:
+    import urllib.request as urllib


 def generate_colormap(num_classes):
@@ -465,6 +470,24 @@ class Resize(object):
            _cv2_interpolation_to_str[self.interpolation])


+class PadStride(object):
+    def __init__(self, stride):
+        self.coarsest_stride = stride
+
+    def __call__(self, img):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride == 0:
+            return img
+        im_c, im_h, im_w = img.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        im_info = {}
+        im_info['resize_shape'] = padding_im.shape[1:]
+        return padding_im
+
+
 class Transpose(object):
    def __init__(self, transpose_target):
        self.transpose_target = transpose_target

--- a/python/paddle_serving_app/reader/imdb_reader.py
+++ b/python/paddle_serving_app/reader/imdb_reader.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+import sys
+import os
+import paddle
+import re
+import paddle.fluid.incubate.data_generator as dg
+
+py_version = sys.version_info[0]
+
+
+class IMDBDataset(dg.MultiSlotDataGenerator):
+    def load_resource(self, dictfile):
+        self._vocab = {}
+        wid = 0
+        if py_version == 2:
+            with open(dictfile) as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        else:
+            with open(dictfile, encoding="utf-8") as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        self._unk_id = len(self._vocab)
+        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
+        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+
+    def get_words_only(self, line):
+        sent = line.lower().replace("<br />", " ").strip()
+        words = [x for x in self._pattern.split(sent) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas
+
+    def get_words_and_label(self, line):
+        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+                                                              " ").strip()
+        label = [int(line.split('|')[-1])]
+
+        words = [x for x in self._pattern.split(send) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas, label
+
+    def infer_reader(self, infer_filelist, batch, buf_size):
+        def local_iter():
+            for fname in infer_filelist:
+                with open(fname, "r") as fin:
+                    for line in fin:
+                        feas, label = self.get_words_and_label(line)
+                        yield feas, label
+
+        import paddle
+        batch_iter = paddle.batch(
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
+            batch_size=batch)
+        return batch_iter
+
+    def generate_sample(self, line):
+        def memory_iter():
+            for i in range(1000):
+                yield self.return_value
+
+        def data_iter():
+            feas, label = self.get_words_and_label(line)
+            yield ("words", feas), ("label", label)
+
+        return data_iter
+
+
+if __name__ == "__main__":
+    imdb = IMDBDataset()
+    imdb.load_resource("imdb.vocab")
+    imdb.run_from_stdin()
--- a/python/paddle_serving_app/trace.py
+++ b/python/paddle_serving_app/trace.py
+# -*- coding: utf-8 -*-
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import json
+import sys
+import argparse
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Convert profile log to trace")
+    parser.add_argument(
+        "--profile_file",
+        type=str,
+        default="",
+        required=True,
+        help="Profile log")
+    parser.add_argument(
+        "--trace_file", type=str, default="trace", help="Trace file")
+    return parser.parse_args()
+
+
+def prase(pid_str, time_str, counter):
+    pid = pid_str.split(":")[1]
+    event_list = time_str.split(" ")
+    trace_list = []
+    for event in event_list:
+        name, ts = event.split(":")
+        name_list = name.split("_")
+        ph = "B" if (name_list[-1] == "0") else "E"
+        if len(name_list) == 2:
+            name = name_list[0]
+        else:
+            name = name_list[0] + "_" + name_list[1]
+        event_dict = {}
+        event_dict["name"] = name
+        event_dict["tid"] = 0
+        event_dict["pid"] = pid
+        event_dict["ts"] = ts
+        event_dict["ph"] = ph
+
+        trace_list.append(event_dict)
+    return trace_list
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    profile_file = args.profile_file
+    trace_file = args.trace_file
+    all_list = []
+    counter = 0
+    with open(profile_file) as f:
+        for line in f.readlines():
+            line = line.strip().split("\t")
+            if line[0] == "PROFILE":
+                trace_list = prase(line[1], line[2], counter)
+                counter += 1
+                for trace in trace_list:
+                    all_list.append(trace)
+
+    trace = json.dumps(all_list, indent=2, separators=(',', ':'))
+    with open(trace_file, "w") as f:
+        f.write(trace)
--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -104,10 +104,10 @@ def save_model(server_model_folder,


 def inference_model_to_serving(dirname,
-                               model_filename=None,
-                               params_filename=None,
                               serving_server="serving_server",
-                               serving_client="serving_client"):
+                               serving_client="serving_client",
+                               model_filename=None,
+                               params_filename=None):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_program, feed_target_names, fetch_targets = \

--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -274,7 +274,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -101,7 +101,6 @@ class WebService(object):
        p_rpc = Process(target=self._launch_rpc_service)
        p_rpc.start()

-    def run_flask(self):
        app_instance = Flask(__name__)

        @app_instance.before_first_request
@@ -114,10 +113,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)

-        app_instance.run(host="0.0.0.0",
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
                              port=self.port,
                              threaded=False,
-                         processes=4)
+                              processes=1)
+
+    def get_app_instance(self):
+        return self.app_instance

    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -320,7 +320,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(

--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -151,7 +151,6 @@ class WebService(object):
        for p in server_pros:
            p.start()

-    def run_flask(self):
        app_instance = Flask(__name__)

        @app_instance.before_first_request
@@ -164,10 +163,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)

-        app_instance.run(host="0.0.0.0",
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
                              port=self.port,
                              threaded=False,
-                         processes=4)
+                              processes=1)
+
+    def get_app_instance(self):
+        return app_instance

    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -20,5 +20,5 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
    && yum clean all
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -21,5 +21,5 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
    && yum clean all
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -343,7 +343,7 @@ function python_test_imdb() {
            sleep 5
            check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
            echo "imdb CPU RPC inference pass"
            kill_server_process
            rm -rf work_dir1
@@ -359,7 +359,7 @@ function python_test_imdb() {
                exit 1
            fi
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
            setproxy # recover proxy state
            kill_server_process
            ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill