Merge remote-tracking branch 'upstream/develop' into 0.2.2-dev

sync

Merge remote-tracking branch 'upstream/develop' into 0.2.2-dev
sync
3b0ae5f4 · MRXLT · 3e3524d3 · 9c70412b · 3b0ae5f4 · 3b0ae5f4
49 changed file
--- a/README.md
+++ b/README.md
@@ -264,8 +264,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa

 ### About Efficiency
 - [How to profile Paddle Serving latency?](python/examples/util)
- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
+- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
 - [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)


--- a/README_CN.md
+++ b/README_CN.md
@@ -270,8 +270,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa

 ### 关于Paddle Serving性能
 - [如何测试Paddle Serving性能？](python/examples/util/)
- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
+- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
+- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
 - [CPU版Benchmarks](doc/BENCHMARKING.md)
 - [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)


--- a/core/cube/cube-agent/src/agent/util.go
+++ b/core/cube/cube-agent/src/agent/util.go
@@ -83,9 +83,6 @@ func JsonReq(method, requrl string, timeout int, kv *map[string]string,
 }

 func GetHdfsMeta(src string) (master, ugi, path string, err error) {
-	//src = "hdfs://root:rootpasst@st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-	//src = "hdfs://st1-inf-platform0.st01.baidu.com:54310/user/mis_user/news_dnn_ctr_cube_1/1501836820/news_dnn_ctr_cube_1_part54.tar"
-
 	ugiBegin := strings.Index(src, "//")
 	ugiPos := strings.LastIndex(src, "@")
 	if ugiPos != -1 && ugiBegin != -1 {

--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -69,9 +69,15 @@ class ModelRes {
  const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
    return _int64_value_map[name];
  }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const std::string& name) {
+    return std::move(_int64_value_map[name]);
+  }
  const std::vector<float>& get_float_by_name(const std::string& name) {
    return _float_value_map[name];
  }
+  std::vector<float>&& get_float_by_name_with_rv(const std::string& name) {
+    return std::move(_float_value_map[name]);
+  }
  const std::vector<int>& get_shape(const std::string& name) {
    return _shape_map[name];
  }
@@ -121,10 +127,18 @@ class PredictorRes {
                                                const std::string& name) {
    return _models[model_idx].get_int64_by_name(name);
  }
+  std::vector<int64_t>&& get_int64_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_int64_by_name_with_rv(name));
+  }
  const std::vector<float>& get_float_by_name(const int model_idx,
                                              const std::string& name) {
    return _models[model_idx].get_float_by_name(name);
  }
+  std::vector<float>&& get_float_by_name_with_rv(const int model_idx,
+                                                 const std::string& name) {
+    return std::move(_models[model_idx].get_float_by_name_with_rv(name));
+  }
  const std::vector<int>& get_shape(const int model_idx,
                                    const std::string& name) {
    return _models[model_idx].get_shape(name);

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -258,9 +258,10 @@ int PredictorClient::batch_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());

+      int idx = 0;
+
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -279,9 +280,9 @@ int PredictorClient::batch_predict(
        idx += 1;
      }

+      idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(
@@ -536,9 +537,9 @@ int PredictorClient::numpy_predict(
      ModelRes model;
      model.set_engine_name(output.engine_name());

+      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        int shape_size = output.insts(0).tensor_array(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
@@ -557,9 +558,10 @@ int PredictorClient::numpy_predict(
        idx += 1;
      }

+      idx = 0;
+
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int idx = 0;
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
          model._int64_value_map[name].resize(

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -32,14 +32,23 @@ PYBIND11_MODULE(serving_client, m) {
      .def(py::init())
      .def("get_int64_by_name",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_int64_by_name(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             // see more: https://github.com/pybind/pybind11/issues/1042
+             std::vector<int64_t> *ptr = new std::vector<int64_t>(
+                 std::move(self.get_int64_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int64_t> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("get_float_by_name",
           [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_float_by_name(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<float> *ptr = new std::vector<float>(
+                 std::move(self.get_float_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<float> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
      .def("get_shape",
           [](PredictorRes &self, int model_idx, std::string &name) {
             return self.get_shape(model_idx, name);

--- a/doc/BERT_10_MINS_CN.md
+++ b/doc/BERT_10_MINS_CN.md
@@ -13,10 +13,10 @@ import paddlehub as hub
 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
 inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
-feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
+feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
 fetch_keys = ["pooled_output", "sequence_output"]
 feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
-fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
+fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))

 import paddle_serving_client.io as serving_io
 serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)

--- a/doc/SAVE.md
+++ b/doc/SAVE.md
@@ -10,8 +10,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a 
-dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. 
+
+Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
 is as follows:
 ``` python
 from paddle_serving_client import Client
@@ -35,10 +36,14 @@ for line in sys.stdin:
 If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
 ```
 dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
-model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
-paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
+
 serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
+
 serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
+
+model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
+
+paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
@@ -11,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
                      {"words": data}, {"prediction": prediction},
                      fluid.default_main_program())
 ```
-imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{words”：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：
+imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
+
+Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中，`{"words"：data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction"：projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下：

 ``` python
 from paddle_serving_client import Client
@@ -35,10 +37,14 @@ for line in sys.stdin:
 如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型，则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
 ```
 import paddle_serving_client.io as serving_io
-serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
+serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client",  model_filename=None, params_filename=None)
 ```
 dirname (str) – 需要转换的模型文件存储路径，Program结构文件和参数文件均保存在此目录。
-model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 __model__ 作为默认的文件名。默认值为None。
+
+serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
+
+serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
+
+model_filename (str，可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None，则使用 `__model__` 作为默认的文件名。默认值为None。
+
 params_filename (str，可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定。如果模型参数是存储在各自分离的文件中，设置它的值为None。默认值为None。
-serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
-serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
--- a/doc/UWSGI_DEPLOY.md
+++ b/doc/UWSGI_DEPLOY.md
-# 使用uwsgi启动HTTP预测服务
+# Deploy HTTP service with uWSGI

-在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+([简体中文](./UWSGI_DEPLOY_CN.md)|English)
+
+In fit_a_line example, after starting the HTTP prediction service, you will see the following information:

 ```shell
 web service address:
@@ -13,46 +15,31 @@ http://10.127.3.150:9393/uci/prediction
 * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
 ```

-这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+Here you will be prompted that the HTTP service started is in development mode and cannot be used for production deployment. 
+The prediction service started by Flask is not stable enough to withstand the concurrency of a large number of requests. In the actual deployment process, WSGI (Web Server Gateway Interface) is used.

-下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+Next, we will show how to use the [uWSGI] (https://github.com/unbit/uwsgi) module to deploy HTTP prediction services for production environments.

-编写HTTP服务脚本

 ```python
 #uwsgi_service.py
 from paddle_serving_server.web_service import WebService
-from flask import Flask, request

-#配置预测服务
+#Define prediction service
 uci_service = WebService(name = "uci")
 uci_service.load_model_config("./uci_housing_model")
 uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
 uci_service.run_server()
-
-#配置flask服务
-app_instance = Flask(__name__)
-@app_instance.before_first_request
-def init():
-    global uci_service
-    uci_service._launch_web_service()
-
-service_name = "/" + uci_service.name + "/prediction"
-@app_instance.route(service_name, methods=["POST"])
-def run():
-    return uci_service.get_prediction(request)
-
-#run方法用于直接调试中直接启动服务
-if __name__ == "__main__":
-    app_instance.run()
+#Get flask application
+app_instance = uci_service.get_app_instance()
 ```

-使用uwsgi启动HTTP服务
+Start service with uWSGI

 ```bash
-uwsgi --http :9000 --wsgi-file uwsgi_service.py --callable app_instance --processes 4
+uwsgi --http :9393 --module uwsgi_service:app_instance
 ```

-使用--processes参数可以指定服务的进程数，请注意目前Serving HTTP 服务暂时不支持多线程的方式使用。
+Use the --processes parameter to specify the number of service processes. 

-更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
+For more information about uWSGI, please refer to [uWSGI documentation](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/doc/UWSGI_DEPLOY_CN.md
+++ b/doc/UWSGI_DEPLOY_CN.md
+# 使用uwsgi启动HTTP预测服务
+
+(简体中文|[English](./UWSGI_DEPLOY.md))
+
+在提供的fit_a_line示例中，启动HTTP预测服务后会看到有以下信息：
+
+```shell
+web service address:
+http://10.127.3.150:9393/uci/prediction
+ * Serving Flask app "serve" (lazy loading)
+ * Environment: production
+   WARNING: This is a development server. Do not use it in a production deployment.
+   Use a production WSGI server instead.
+ * Debug mode: off
+ * Running on http://0.0.0.0:9393/ (Press CTRL+C to quit)
+```
+
+这里会提示启动的HTTP服务是开发模式，并不能用于生产环境的部署。Flask启动的服务环境不够稳定也无法承受大量请求的并发，实际部署过程中配合需要WSGI（Web Server Gateway Interface）使用。
+
+下面我们展示一下如何使用[uWSGI](https://github.com/unbit/uwsgi)模块来部署HTTP预测服务用于生产环境。
+
+编写HTTP服务脚本
+
+```python
+#uwsgi_service.py
+from paddle_serving_server.web_service import WebService
+
+#配置预测服务
+uci_service = WebService(name = "uci")
+uci_service.load_model_config("./uci_housing_model")
+uci_service.prepare_server(workdir="./workdir", port=int(9500), device="cpu")
+uci_service.run_server()
+#获取flask服务
+app_instance = uci_service.get_app_instance()
+```
+
+使用uwsgi启动HTTP服务
+
+```bash
+uwsgi --http :9393 --module uwsgi_service:app_instance
+```
+
+使用--processes参数可以指定服务的进程数。
+
+更多uWSGI的信息请参考[uWSGI使用文档](https://uwsgi-docs.readthedocs.io/en/latest/)
--- a/python/examples/cascade_rcnn/README.md
+++ b/python/examples/cascade_rcnn/README.md
+# Cascade RCNN model on Paddle Serving
+
+([简体中文](./README_CN.md)|English)
+
+### Get The Cascade RCNN Model
+```
+sh get_data.sh
+```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### Start the service
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### Perform prediction
+```
+python test_client.py 
+```
+
+Image with bounding boxes and json result would be saved in `output` folder.
--- a/python/examples/cascade_rcnn/README_CN.md
+++ b/python/examples/cascade_rcnn/README_CN.md
+# 使用Paddle Serving部署Cascade RCNN模型
+
+(简体中文|[English](./README.md))
+
+## 获得Cascade RCNN模型
+```
+sh get_data.sh
+```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
+
+### 启动服务
+```
+python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
+```
+
+### 执行预测
+```
+python test_client.py
+```
+
+客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
--- a/python/examples/cascade_rcnn/get_data.sh
+++ b/python/examples/cascade_rcnn/get_data.sh
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
+tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -15,34 +15,35 @@ sh get_model.sh
 pip install paddle_serving_app
 ```

-### HTTP Infer
+### HTTP Service

 launch server side
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu inference service
+python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu inference service
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu inference service
+python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu inference service
 ```


 client send inference request
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### RPC Infer
+
+### RPC Service

 launch server side
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu inference service
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
 ```

 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu inference service
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
 ```

 client send inference request
 ```
 python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*the port of server side in this example is 9393, the sample data used by client side is in the folder ./data. These parameter can be modified in practice*
+*the port of server side in this example is 9696
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -15,34 +15,35 @@ sh get_model.sh
 pip install paddle_serving_app
 ```

-### 执行HTTP预测服务
+### HTTP服务

 启动server端
 ```
-python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu预测服务
+python image_classification_service.py ResNet50_vd_model cpu 9696 #cpu预测服务
 ```
 ```
-python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu预测服务
+python image_classification_service.py ResNet50_vd_model gpu 9696 #gpu预测服务
 ```


-client端进行预测
+发送HTTP POST请求
 ```
-python image_http_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
 ```
-### 执行RPC预测服务
+
+### RPC服务

 启动server端
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu预测服务
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
 ```

 ```
-python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu预测服务
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
 ```

 client端进行预测
 ```
 python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
-*server端示例中服务端口为9393端口，client端示例中数据来自./data文件夹，server端地址为本地9393端口，可根据实际情况更改脚本。*
+*server端示例中服务端口为9696端口
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -39,8 +39,8 @@ def single_func(idx, resource):
        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])

        start = time.time()
-        for i in range(1000):
-            img = reader.process_image(img_list[i]).reshape(-1)
+        for i in range(100):
+            img = reader.process_image(img_list[i])
            fetch_map = client.predict(feed={"image": img}, fetch=["score"])
        end = time.time()
        return [[end - start]]
@@ -49,7 +49,7 @@ def single_func(idx, resource):

 if __name__ == "__main__":
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
    #card_num = 4
    #for i in range(args.thread):
    #    endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))

--- a/python/examples/imagenet/benchmark_batch.py
+++ b/python/examples/imagenet/benchmark_batch.py
@@ -24,6 +24,7 @@ from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
 import requests
 import json
+import base64
 from image_reader import ImageReader

 args = benchmark_args()
@@ -36,6 +37,10 @@ def single_func(idx, resource):
    img_list = []
    for i in range(1000):
        img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    profile_flags = False
+    if "FLAGS_profile_client" in os.environ and os.environ[
+            "FLAGS_profile_client"]:
+        profile_flags = True
    if args.request == "rpc":
        reader = ImageReader()
        fetch = ["score"]
@@ -46,23 +51,43 @@ def single_func(idx, resource):
        for i in range(1000):
            if args.batch_size >= 1:
                feed_batch = []
+                i_start = time.time()
                for bi in range(args.batch_size):
                    img = reader.process_image(img_list[i])
-                    img = img.reshape(-1)
                    feed_batch.append({"image": img})
+                i_end = time.time()
+                if profile_flags:
+                    print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
+                          format(os.getpid(),
+                                 int(round(i_start * 1000000)),
+                                 int(round(i_end * 1000000))))
+
                result = client.predict(feed=feed_batch, fetch=fetch)
            else:
                print("unsupport batch size {}".format(args.batch_size))

    elif args.request == "http":
-        raise ("no batch predict for http")
+        py_version = 2
+        server = "http://" + resource["endpoint"][idx % len(resource[
+            "endpoint"])] + "/image/prediction"
+        start = time.time()
+        for i in range(1000):
+            if py_version == 2:
+                image = base64.b64encode(
+                    open("./image_data/n01440764/" + file_list[i]).read())
+            else:
+                image = base64.b64encode(open(image_path, "rb").read()).decode(
+                    "utf-8")
+            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
+            r = requests.post(
+                server, data=req, headers={"Content-Type": "application/json"})
    end = time.time()
    return [[end - start]]


 if __name__ == '__main__':
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9393"]
+    endpoint_list = ["127.0.0.1:9292"]
    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})

--- a/python/examples/imagenet/daisy.jpg
+++ b/python/examples/imagenet/daisy.jpg
--- a/python/examples/imagenet/flower.jpg
+++ b/python/examples/imagenet/flower.jpg
--- a/python/examples/imagenet/image_classification_service_gpu.py
+++ b/python/examples/imagenet/image_classification_service_gpu.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
-from paddle_serving_server_gpu.web_service import WebService
-
-
-class ImageService(WebService):
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
-        feed_batch = []
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
-            feed_batch.append({"image": img})
-        return feed_batch, fetch
-
-
-image_service = ImageService(name="image")
-image_service.load_model_config(sys.argv[1])
-image_service.set_gpus("0,1")
-image_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu")
-image_service.run_server()
-image_service.run_flask()
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import requests
-import base64
-import json
-import time
-import os
-import sys
-
-py_version = sys.version_info[0]
-
-
-def predict(image_path, server):
-    if py_version == 2:
-        image = base64.b64encode(open(image_path).read())
-    else:
-        image = base64.b64encode(open(image_path, "rb").read()).decode("utf-8")
-    req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
-    r = requests.post(
-        server, data=req, headers={"Content-Type": "application/json"})
-    try:
-        print(r.json()["result"]["score"])
-    except ValueError:
-        print(r.text)
-    return r
-
-
-if __name__ == "__main__":
-    server = "http://127.0.0.1:9393/image/prediction"
-    image_list = os.listdir("./image_data/n01440764/")
-    start = time.time()
-    for img in image_list:
-        image_file = "./image_data/n01440764/" + img
-        res = predict(image_file, server)
-    end = time.time()
-    print(end - start)
--- a/python/examples/imagenet/image_reader.py
+++ b/python/examples/imagenet/image_reader.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cv2
-import numpy as np
-
-
-class ImageReader():
-    def __init__(self):
-        self.image_mean = [0.485, 0.456, 0.406]
-        self.image_std = [0.229, 0.224, 0.225]
-        self.image_shape = [3, 224, 224]
-        self.resize_short_size = 256
-        self.interpolation = None
-
-    def resize_short(self, img, target_size, interpolation=None):
-        """resize image
-
-        Args:
-            img: image data
-            target_size: resize short target size
-            interpolation: interpolation mode
-
-        Returns:
-            resized image data
-        """
-        percent = float(target_size) / min(img.shape[0], img.shape[1])
-        resized_width = int(round(img.shape[1] * percent))
-        resized_height = int(round(img.shape[0] * percent))
-        if interpolation:
-            resized = cv2.resize(
-                img, (resized_width, resized_height),
-                interpolation=interpolation)
-        else:
-            resized = cv2.resize(img, (resized_width, resized_height))
-        return resized
-
-    def crop_image(self, img, target_size, center):
-        """crop image
-
-        Args:
-            img: images data
-            target_size: crop target size
-            center: crop mode
-
-        Returns:
-            img: cropped image data
-        """
-        height, width = img.shape[:2]
-        size = target_size
-        if center == True:
-            w_start = (width - size) // 2
-            h_start = (height - size) // 2
-        else:
-            w_start = np.random.randint(0, width - size + 1)
-            h_start = np.random.randint(0, height - size + 1)
-        w_end = w_start + size
-        h_end = h_start + size
-        img = img[h_start:h_end, w_start:w_end, :]
-        return img
-
-    def process_image(self, sample):
-        """ process_image """
-        mean = self.image_mean
-        std = self.image_std
-        crop_size = self.image_shape[1]
-
-        data = np.fromstring(sample, np.uint8)
-        img = cv2.imdecode(data, cv2.IMREAD_COLOR)
-
-        if img is None:
-            print("img is None, pass it.")
-            return None
-
-        if crop_size > 0:
-            target_size = self.resize_short_size
-            img = self.resize_short(
-                img, target_size, interpolation=self.interpolation)
-            img = self.crop_image(img, target_size=crop_size, center=True)
-
-        img = img[:, :, ::-1]
-
-        img = img.astype('float32').transpose((2, 0, 1)) / 255
-        img_mean = np.array(mean).reshape((3, 1, 1))
-        img_std = np.array(std).reshape((3, 1, 1))
-        img -= img_mean
-        img /= img_std
-        return img
--- a/python/examples/imagenet/imagenet.label
+++ b/python/examples/imagenet/imagenet.label
--- a/python/examples/imagenet/image_rpc_client.py
+++ b/python/examples/imagenet/image_rpc_client.py
@@ -14,23 +14,35 @@

 import sys
 from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
 import time

 client = Client()
 client.load_client_config(sys.argv[1])
-client.connect(["127.0.0.1:9393"])
+client.connect(["127.0.0.1:9696"])
+
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+    for line in fin:
+        label_dict[label_idx] = line.strip()
+        label_idx += 1

 seq = Sequential([
-    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
-    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
 ])
-print(seq)

 start = time.time()
-image_file = "daisy.jpg"
-for i in range(1000):
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(10):
    img = seq(image_file)
    fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+    prob = max(fetch_map["score"][0])
+    label = label_dict[fetch_map["score"][0].tolist().index(prob)].strip(
+    ).replace(",", "")
+    print("prediction: {}, probability: {}".format(label, prob))
+
 end = time.time()
 print(end - start)
--- a/python/examples/imagenet/image_classification_service.py
+++ b/python/examples/imagenet/image_classification_service.py
@@ -11,31 +11,62 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from paddle_serving_server.web_service import WebService
 import sys
-import cv2
-import base64
-import numpy as np
-from paddle_serving_app import ImageReader
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
+
+if len(sys.argv) != 4:
+    print("python resnet50_web_service.py model device port")
+    sys.exit(-1)
+
+device = sys.argv[2]
+
+if device == "cpu":
+    from paddle_serving_server.web_service import WebService
+else:
+    from paddle_serving_server_gpu.web_service import WebService


 class ImageService(WebService):
-    def preprocess(self, feed={}, fetch=[]):
-        reader = ImageReader()
+    def init_imagenet_setting(self):
+        self.seq = Sequential([
+            URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose(
+                (2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406],
+                                                [0.229, 0.224, 0.225], True)
+        ])
+        self.label_dict = {}
+        label_idx = 0
+        with open("imagenet.label") as fin:
+            for line in fin:
+                self.label_dict[label_idx] = line.strip()
+                label_idx += 1
+
+    def preprocess(self, feed=[], fetch=[]):
        feed_batch = []
        for ins in feed:
            if "image" not in ins:
                raise ("feed data error!")
-            sample = base64.b64decode(ins["image"])
-            img = reader.process_image(sample)
+            img = self.seq(ins["image"])
            feed_batch.append({"image": img})
        return feed_batch, fetch

+    def postprocess(self, feed=[], fetch=[], fetch_map={}):
+        score_list = fetch_map["score"]
+        result = {"label": [], "prob": []}
+        for score in score_list:
+            max_score = max(score)
+            result["label"].append(self.label_dict[score.index(max_score)]
+                                   .strip().replace(",", ""))
+            result["prob"].append(max_score)
+        return result
+

 image_service = ImageService(name="image")
 image_service.load_model_config(sys.argv[1])
+image_service.init_imagenet_setting()
+if device == "gpu":
+    image_service.set_gpus("0,1")
 image_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+    workdir="workdir", port=int(sys.argv[3]), device=device)
 image_service.run_server()
 image_service.run_flask()
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -16,7 +16,7 @@
 import sys
 import time
 import requests
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -37,26 +37,39 @@ def single_func(idx, resource):
        client.load_client_config(args.model)
        client.connect([args.endpoint])
        for i in range(1000):
-            if args.batch_size == 1:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                fetch_map = client.predict(
-                    feed={"words": word_ids}, fetch=["prediction"])
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
+                        bi])
+                    feed_batch.append({"words": word_ids})
+                result = client.predict(feed=feed_batch, fetch=["prediction"])
+                if result is None:
+                    raise ("predict failed.")
            else:
                print("unsupport batch size {}".format(args.batch_size))

    elif args.request == "http":
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
-                r = requests.post(
-                    "http://{}/imdb/prediction".format(args.endpoint),
-                    data={"words": word_ids,
-                          "fetch": ["prediction"]})
+        if args.batch_size >= 1:
+            feed_batch = []
+            for bi in range(args.batch_size):
+                feed_batch.append({"words": dataset[bi]})
+            r = requests.post(
+                "http://{}/imdb/prediction".format(args.endpoint),
+                json={"feed": feed_batch,
+                      "fetch": ["prediction"]})
+            if r.status_code != 200:
+                print('HTTP status code -ne 200')
+                raise ("predict failed.")
+        else:
+            print("unsupport batch size {}".format(args.batch_size))
    end = time.time()
    return [[end - start]]


 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
-print(result)
+avg_cost = 0
+for cost in result[0]:
+    avg_cost += cost
+print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
 rm profile_log
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
    echo "========================================"
    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log
 done
+done
--- a/python/examples/imdb/benchmark_batch.py
+++ b/python/examples/imdb/benchmark_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import time
-import requests
-from imdb_reader import IMDBDataset
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-
-args = benchmark_args()
-
-
-def single_func(idx, resource):
-    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource("./imdb.vocab")
-    dataset = []
-    with open("./test_data/part-0") as fin:
-        for line in fin:
-            dataset.append(line.strip())
-    start = time.time()
-    if args.request == "rpc":
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([args.endpoint])
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    word_ids, label = imdb_dataset.get_words_and_label(dataset[
-                        bi])
-                    feed_batch.append({"words": word_ids})
-                result = client.predict(feed=feed_batch, fetch=["prediction"])
-                if result is None:
-                    raise ("predict failed.")
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-
-    elif args.request == "http":
-        if args.batch_size >= 1:
-            feed_batch = []
-            for bi in range(args.batch_size):
-                feed_batch.append({"words": dataset[bi]})
-            r = requests.post(
-                "http://{}/imdb/prediction".format(args.endpoint),
-                json={"feed": feed_batch,
-                      "fetch": ["prediction"]})
-            if r.status_code != 200:
-                print('HTTP status code -ne 200')
-                raise ("predict failed.")
-        else:
-            print("unsupport batch size {}".format(args.batch_size))
-    end = time.time()
-    return [[end - start]]
-
-
-multi_thread_runner = MultiThreadRunner()
-result = multi_thread_runner.run(single_func, args.thread, {})
-avg_cost = 0
-for cost in result[0]:
-    avg_cost += cost
-print("total cost {} s of each thread".format(avg_cost / args.thread))
--- a/python/examples/imdb/benchmark_batch.sh
+++ b/python/examples/imdb/benchmark_batch.sh
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
 from paddle_serving_client import Client
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys

 client = Client()

--- a/python/examples/imdb/test_client_batch.py
+++ b/python/examples/imdb/test_client_batch.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def batch_predict(batch_size=4):
-    client = Client()
-    client.load_client_config(conf_file)
-    client.connect(["127.0.0.1:9292"])
-    fetch = ["acc", "cost", "prediction"]
-    feed_batch = []
-    for line in sys.stdin:
-        group = line.strip().split()
-        words = [int(x) for x in group[1:int(group[0])]]
-        label = [int(group[-1])]
-        feed = {"words": words, "label": label}
-        feed_batch.append(feed)
-        if len(feed_batch) == batch_size:
-            fetch_batch = client.batch_predict(
-                feed_batch=feed_batch, fetch=fetch)
-            for i in range(batch_size):
-                print("{} {}".format(fetch_batch[i]["prediction"][1],
-                                     feed_batch[i]["label"][0]))
-            feed_batch = []
-    if len(feed_batch) > 0:
-        fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
-        for i in range(len(feed_batch)):
-            print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
-                "label"][0]))
-
-
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    batch_size = int(sys.argv[2])
-    batch_predict(batch_size)
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -14,7 +14,7 @@
 # pylint: disable=doc-string-missing

 from paddle_serving_server.web_service import WebService
-from imdb_reader import IMDBDataset
+from paddle_serving_app import IMDBDataset
 import sys



--- a/python/examples/senta/senta_web_service.py
+++ b/python/examples/senta/senta_web_service.py
@@ -51,13 +51,11 @@ class SentaService(WebService):
    def init_lac_service(self):
        ps = Process(target=self.start_lac_service())
        ps.start()
-        #self.init_lac_client()
+        self.init_lac_client()

    def lac_predict(self, feed_data):
-        self.init_lac_client()
        lac_result = self.lac_client.predict(
            feed={"words": feed_data}, fetch=["crf_decode"])
-        self.lac_client.release()
        return lac_result

    def init_lac_client(self):

--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .reader.chinese_bert_reader import ChineseBertReader
-from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize
+from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, PadStride
 from .reader.lac_reader import LACReader
 from .reader.senta_reader import SentaReader
+from .reader.imdb_reader import IMDBDataset
 from .models import ServingModels
 from .local_predict import Debugger
--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -11,4 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess, PadStride
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -465,6 +465,24 @@ class Resize(object):
            _cv2_interpolation_to_str[self.interpolation])


+class PadStride(object):
+    def __init__(self, stride):
+        self.coarsest_stride = stride
+
+    def __call__(self, img):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride == 0:
+            return img
+        im_c, im_h, im_w = img.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        im_info = {}
+        im_info['resize_shape'] = padding_im.shape[1:]
+        return padding_im
+
+
 class Transpose(object):
    def __init__(self, transpose_target):
        self.transpose_target = transpose_target

--- a/python/paddle_serving_app/reader/imdb_reader.py
+++ b/python/paddle_serving_app/reader/imdb_reader.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+import sys
+import os
+import paddle
+import re
+import paddle.fluid.incubate.data_generator as dg
+
+py_version = sys.version_info[0]
+
+
+class IMDBDataset(dg.MultiSlotDataGenerator):
+    def load_resource(self, dictfile):
+        self._vocab = {}
+        wid = 0
+        if py_version == 2:
+            with open(dictfile) as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        else:
+            with open(dictfile, encoding="utf-8") as f:
+                for line in f:
+                    self._vocab[line.strip()] = wid
+                    wid += 1
+        self._unk_id = len(self._vocab)
+        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
+        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+
+    def get_words_only(self, line):
+        sent = line.lower().replace("<br />", " ").strip()
+        words = [x for x in self._pattern.split(sent) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas
+
+    def get_words_and_label(self, line):
+        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+                                                              " ").strip()
+        label = [int(line.split('|')[-1])]
+
+        words = [x for x in self._pattern.split(send) if x and x != " "]
+        feas = [
+            self._vocab[x] if x in self._vocab else self._unk_id for x in words
+        ]
+        return feas, label
+
+    def infer_reader(self, infer_filelist, batch, buf_size):
+        def local_iter():
+            for fname in infer_filelist:
+                with open(fname, "r") as fin:
+                    for line in fin:
+                        feas, label = self.get_words_and_label(line)
+                        yield feas, label
+
+        import paddle
+        batch_iter = paddle.batch(
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
+            batch_size=batch)
+        return batch_iter
+
+    def generate_sample(self, line):
+        def memory_iter():
+            for i in range(1000):
+                yield self.return_value
+
+        def data_iter():
+            feas, label = self.get_words_and_label(line)
+            yield ("words", feas), ("label", label)
+
+        return data_iter
+
+
+if __name__ == "__main__":
+    imdb = IMDBDataset()
+    imdb.load_resource("imdb.vocab")
+    imdb.run_from_stdin()
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -329,9 +329,9 @@ class Client(object):
            # result map needs to be a numpy array
            for i, name in enumerate(fetch_names):
                if self.fetch_names_to_type_[name] == int_type:
+                    # result_map[name] will be py::array(numpy array)
                    result_map[name] = result_batch.get_int64_by_name(mi, name)
                    shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(result_map[name], dtype='int64')
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
                        result_map["{}.lod".format(name)] = np.array(
@@ -339,8 +339,6 @@ class Client(object):
                elif self.fetch_names_to_type_[name] == float_type:
                    result_map[name] = result_batch.get_float_by_name(mi, name)
                    shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(
-                        result_map[name], dtype='float32')
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
                        result_map["{}.lod".format(name)] = np.array(

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -104,10 +104,10 @@ def save_model(server_model_folder,


 def inference_model_to_serving(dirname,
-                               model_filename=None,
-                               params_filename=None,
                               serving_server="serving_server",
-                               serving_client="serving_client"):
+                               serving_client="serving_client",
+                               model_filename=None,
+                               params_filename=None):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_program, feed_target_names, fetch_targets = \

--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -274,7 +274,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -101,7 +101,6 @@ class WebService(object):
        p_rpc = Process(target=self._launch_rpc_service)
        p_rpc.start()

-    def run_flask(self):
        app_instance = Flask(__name__)

        @app_instance.before_first_request
@@ -114,10 +113,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)

-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return self.app_instance

    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -320,7 +320,8 @@ class Server(object):
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
-            workflow_oi_config_path = self.model_config_paths.items()[0][1]
+            workflow_oi_config_path = list(self.model_config_paths.items())[0][
+                1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(

--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -151,7 +151,6 @@ class WebService(object):
        for p in server_pros:
            p.start()

-    def run_flask(self):
        app_instance = Flask(__name__)

        @app_instance.before_first_request
@@ -164,10 +163,16 @@ class WebService(object):
        def run():
            return self.get_prediction(request)

-        app_instance.run(host="0.0.0.0",
-                         port=self.port,
-                         threaded=False,
-                         processes=4)
+        self.app_instance = app_instance
+
+    def run_flask(self):
+        self.app_instance.run(host="0.0.0.0",
+                              port=self.port,
+                              threaded=False,
+                              processes=1)
+
+    def get_app_instance(self):
+        return app_instance

    def preprocess(self, feed=[], fetch=[]):
        return feed, fetch

--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -43,5 +43,5 @@ RUN yum -y install wget && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
-    yum -y install epel-release && yum -y install patchelf && \
+    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -20,5 +20,5 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
    && yum clean all
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -21,5 +21,5 @@ RUN yum -y install wget >/dev/null \
    && rm get-pip.py \
    && yum install -y python3 python3-devel \
    && pip3 install google protobuf setuptools wheel flask \
-    && yum -y install epel-release && yum -y install patchelf \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
    && yum clean all
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -343,7 +343,7 @@ function python_test_imdb() {
            sleep 5
            check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
            echo "imdb CPU RPC inference pass"
            kill_server_process
            rm -rf work_dir1
@@ -359,7 +359,7 @@ function python_test_imdb() {
                exit 1
            fi
            # test batch predict
-            check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
+            check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
            setproxy # recover proxy state
            kill_server_process
            ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill