Merge pull request #1348 from HexToString/grpc_update

update http_session

Merge pull request #1348 from HexToString/grpc_update
update http_session
d66a0016 · TeslaZhao · GitHub · 4cf08d4e · 2c2a7218 · d66a0016
27 changed file
--- a/java/README_CN.md
+++ b/java/README_CN.md
@@ -98,7 +98,7 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli
 ### 注意事项
-1.在示例中，端口号都是9393，ip默认设置为了0.0.0.0表示本机，注意ip和port需要与Server端对应。
+1.在示例中，端口号都是9393，ip默认设置为了127.0.0.1表示本机，注意ip和port需要与Server端对应。
 2.目前Serving已推出Pipeline模式（原理详见[Pipeline Serving](../doc/PIPELINE_SERVING_CN.md)），面向Java的Pipeline Serving Client已发布。

--- a/java/examples/src/main/java/PaddleServingClientExample.java
+++ b/java/examples/src/main/java/PaddleServingClientExample.java
@@ -25,7 +25,7 @@ public class PaddleServingClientExample {
        List<String> fetch = Arrays.asList("price");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        String result = client.predict(feed_data, fetch, true, 0);
@@ -49,7 +49,7 @@ public class PaddleServingClientExample {
        Client client = new Client();
        //注意：跨docker，需要设置--net-host或直接访问另一个docker的ip
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.set_http_proto(false);
        client.loadClientConfig(model_config_path);
@@ -73,7 +73,7 @@ public class PaddleServingClientExample {
        List<String> fetch = Arrays.asList("price");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        client.set_use_grpc_client(true);
@@ -97,7 +97,7 @@ public class PaddleServingClientExample {
        List<String> fetch = Arrays.asList("price");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        client.use_key(keyFilePath);
@@ -125,7 +125,7 @@ public class PaddleServingClientExample {
        List<String> fetch = Arrays.asList("price");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        client.set_request_compress(true);
@@ -176,7 +176,7 @@ public class PaddleServingClientExample {
            }};
        List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        String result = client.predict(feed_data, fetch, true, 0);
@@ -198,7 +198,7 @@ public class PaddleServingClientExample {
            }};
        List<String> fetch = Arrays.asList("pooled_output");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        String result = client.predict(feed_data, fetch, true, 0);
@@ -268,7 +268,7 @@ public class PaddleServingClientExample {
            }};
        List<String> fetch = Arrays.asList("prob");
        Client client = new Client();
-        client.setIP("0.0.0.0");
+        client.setIP("127.0.0.1");
        client.setPort("9393");
        client.loadClientConfig(model_config_path);
        String result = client.predict(feed_data, fetch, true, 0);

--- a/java/src/main/java/io/paddle/serving/client/Client.java
+++ b/java/src/main/java/io/paddle/serving/client/Client.java
@@ -134,7 +134,7 @@ public class Client {
        feedTensorLen_ = null;
        feedNameToIndex_ = null;
        timeoutS_ = 200000;
-        ip = "0.0.0.0";
+        ip = "127.0.0.1";
        port = "9393";
        serverPort = "9393";
        serviceName = "/GeneralModelService/inference";

--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
-## Bert as service
+Http## Bert as service
 ([简体中文](./README_CN.md)|English)
@@ -42,7 +42,7 @@ sh get_data.sh
 ```
 this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt
-### RPC Inference Service
+### Inference Service(Support BRPC-Client、GRPC-Client、Http-Client)
 start cpu inference service,Run
 ```
 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #cpu inference service
@@ -52,7 +52,7 @@ Or,start gpu inference service,Run
 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
 ```
-### RPC Inference
+### BRPC-Client Inference
 before prediction we should install paddle_serving_app. This module provides data preprocessing for BERT model.
 ```
@@ -65,25 +65,13 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
 the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it).
-### HTTP Inference Service
+#### GRPC-Client/HTTP-Client
-start cpu HTTP inference service,Run
+Run
-```
- python bert_web_service.py bert_seq128_model/ 9292 #launch cpu inference service
 ```
+head data-c.txt | python bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
-Or,start gpu HTTP inference service,Run
-```
- export CUDA_VISIBLE_DEVICES=0,1
-```
-set environmental variable to specify which gpus are used, the command above means gpu 0 and gpu 1 is used.
 ```
- python bert_web_service_gpu.py bert_seq128_model/ 9292 #launch gpu inference service
-```
-### HTTP Inference 
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
-```
 ## Benchmark
 ``` shell

--- a/python/examples/bert/README_CN.md
+++ b/python/examples/bert/README_CN.md
@@ -40,7 +40,7 @@ sh get_data.sh
 ```
 脚本将下载中文词典vocab.txt和中文样例数据data-c.txt
-### 启动RPC预测服务
+### 启动预测服务（支持BRPC-Client、GRPC-Client、HTTP-Client三种方式访问）
 启动cpu预测服务，执行
 ```
 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #启动cpu预测服务
@@ -58,6 +58,8 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --g
 ```
 pip install paddle_serving_app
 ```
+#### BRPC-Client
 执行
 ```
 head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
@@ -65,28 +67,11 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
 ```
 启动client读取data-c.txt中的数据进行预测，预测结果为文本的向量表示（由于数据较多，脚本中没有将输出进行打印），server端的地址在脚本中修改。
+#### GRPC-Client/HTTP-Client
+执行
-### 启动HTTP预测服务
-启动cpu HTTP预测服务，执行
-```
-python bert_web_service.py bert_seq128_model/ 9292 #启动CPU预测服务
-```
-或者，启动gpu HTTP预测服务，执行
-```
- export CUDA_VISIBLE_DEVICES=0,1
-```
-通过环境变量指定gpu预测服务使用的gpu，示例中指定索引为0和1的两块gpu
-```
-python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
 ```
+head data-c.txt | python bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
-### 执行预测
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
 ## 性能测试

--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
-# coding=utf-8
+# coding:utf-8
+# pylint: disable=doc-string-missing
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,37 +13,47 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_server.web_service import WebService
-from paddle_serving_app.reader import ChineseBertReader
 import sys
-import os
+from paddle_serving_client import HttpClient
+from paddle_serving_client.utils import benchmark_args
+from paddle_serving_app.reader import ChineseBertReader
 import numpy as np
+args = benchmark_args()
+reader = ChineseBertReader({"max_seq_len": 128})
+fetch = ["pooled_output"]
-class BertService(WebService):
+client = HttpClient(ip='127.0.0.1', port='9292')
-    def load(self):
+client.load_client_config(args.model)
-        self.reader = ChineseBertReader({
+#client.set_ip('127.0.0.1')
-            "vocab_file": "vocab.txt",
+#client.set_port('9292')
-            "max_seq_len": 128
+''' 
-        })
+if you want use GRPC-client, set_use_grpc_client(True)
+or you can directly use client.grpc_client_predict(...)
-    def preprocess(self, feed=[], fetch=[]):
+as for HTTP-client,set_use_grpc_client(False)(which is default)
-        feed_res = []
+or you can directly use client.http_client_predict(...)
-        is_batch = False
+'''
-        for ins in feed:
+#client.set_use_grpc_client(True)
-            feed_dict = self.reader.process(ins["words"].encode("utf-8"))
+'''
-            for key in feed_dict.keys():
+if you want to enable Encrypt Module,uncommenting the following line
-                feed_dict[key] = np.array(feed_dict[key]).reshape(
+'''
-                    (len(feed_dict[key]), 1))
+#client.use_key("./key")
-            feed_res.append(feed_dict)
+'''
-        return feed_res, fetch, is_batch
+if you want to compress,uncommenting the following line
+'''
+#client.set_response_compress(True)
+#client.set_request_compress(True)
+'''
+we recommend use Proto data format in HTTP-body, set True(which is default)
+if you want use JSON data format in HTTP-body, set False
+'''
+#client.set_http_proto(True)
-bert_service = BertService(name="bert")
+for line in sys.stdin:
-bert_service.load()
+    feed_dict = reader.process(line)
-bert_service.load_model_config(sys.argv[1])
+    for key in feed_dict.keys():
-bert_service.prepare_server(
+        feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
-    workdir="workdir", port=int(sys.argv[2]), device="cpu")
+    #print(feed_dict)
-bert_service.run_rpc_service()
+    result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
-bert_service.run_web_service()
+print(result)
--- a/python/examples/bert/bert_web_service_gpu.py
+++ b/python/examples/bert/bert_web_service_gpu.py
-# coding=utf-8
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_server.web_service import WebService
-from paddle_serving_app.reader import ChineseBertReader
-import sys
-import os
-import numpy as np
-class BertService(WebService):
-    def load(self):
-        self.reader = ChineseBertReader({
-            "vocab_file": "vocab.txt",
-            "max_seq_len": 128
-        })
-    def preprocess(self, feed=[], fetch=[]):
-        feed_res = []
-        is_batch = False
-        for ins in feed:
-            feed_dict = self.reader.process(ins["words"].encode("utf-8"))
-            for key in feed_dict.keys():
-                feed_dict[key] = np.array(feed_dict[key]).reshape(
-                    (len(feed_dict[key]), 1))
-            feed_res.append(feed_dict)
-        return feed_res, fetch, is_batch
-bert_service = BertService(name="bert")
-bert_service.load()
-bert_service.load_model_config(sys.argv[1])
-bert_service.set_gpus("0")
-bert_service.prepare_server(
-    workdir="workdir", port=int(sys.argv[2]), device="gpu")
-bert_service.run_rpc_service()
-bert_service.run_web_service()
--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
@@ -9,9 +9,7 @@ sh get_data.sh
 ```
+## 开启服务端（支持BRPC-Client/GRPC Client/Http-Client）
-## 开启服务端
 ```shell
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
@@ -19,7 +17,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 ## 客户端预测
-### 客户端RPC
+### BRPC-Client
 `test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
@@ -27,7 +25,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
-### 客户端Http预测
+### GRPC-Client/Http-Client
 ``` shell
 python test_httpclient.py uci_housing_client/serving_client_conf.prototxt

--- a/python/examples/fit_a_line/test_httpclient.py
+++ b/python/examples/fit_a_line/test_httpclient.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
-from paddle_serving_client.httpclient import GeneralClient
+from paddle_serving_client.httpclient import HttpClient
 import sys
 import numpy as np
 import time
-client = GeneralClient()
+client = HttpClient()
 client.load_client_config(sys.argv[1])
+#client.set_ip('127.0.0.1')
+#client.set_port('9393')
 ''' 
 if you want use GRPC-client, set_use_grpc_client(True)
 or you can directly use client.grpc_client_predict(...)

--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -15,23 +15,8 @@ sh get_model.sh
 pip install paddle_serving_app
 ```
-### HTTP Service
-launch server side
-```
-python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu inference service
-```
-```
-python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu inference service
-```
+### Inference Service(Support BRPC-Client/GRPC-Client/Http-Client)
-client send inference request
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
-```
-### RPC Service
 launch server side
 ```
@@ -42,8 +27,15 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
 python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
 ```
+### BRPC-Client
 client send inference request
 ```
 python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *the port of server side in this example is 9696
+### GRPC-Client/Http-Client
+client send inference request
+```
+python resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+```
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -15,23 +15,7 @@ sh get_model.sh
 pip install paddle_serving_app
 ```
-### HTTP服务
+### 启动服务端（支持BRPC-Client、GRPC-Client、Http-Client）
-启动server端
-```
-python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu预测服务
-```
-```
-python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu预测服务
-```
-发送HTTP POST请求
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction
-```
-### RPC服务
 启动server端
 ```
@@ -42,8 +26,16 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
 python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
 ```
+### BRPC-Client预测
 client端进行预测
 ```
 python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *server端示例中服务端口为9696端口
+### GRPC-Client/Http-Client预测
+client端进行预测
+```
+python resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+```
--- a/python/examples/imagenet/resnet50_http_client.py
+++ b/python/examples/imagenet/resnet50_http_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import HttpClient
+from paddle_serving_app.reader import Sequential, URL2Image, Resize
+from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
+import time
+client = HttpClient(ip='127.0.0.1', port='9696')
+client.load_client_config(sys.argv[1])
+#client.set_ip('127.0.0.1')
+#client.set_port('9292')
+''' 
+if you want use GRPC-client, set_use_grpc_client(True)
+or you can directly use client.grpc_client_predict(...)
+as for HTTP-client,set_use_grpc_client(False)(which is default)
+or you can directly use client.http_client_predict(...)
+'''
+#client.set_use_grpc_client(True)
+'''
+if you want to enable Encrypt Module,uncommenting the following line
+'''
+#client.use_key("./key")
+'''
+if you want to compress,uncommenting the following line
+'''
+#client.set_response_compress(True)
+#client.set_request_compress(True)
+'''
+we recommend use Proto data format in HTTP-body, set True(which is default)
+if you want use JSON data format in HTTP-body, set False
+'''
+#client.set_http_proto(True)
+label_dict = {}
+label_idx = 0
+with open("imagenet.label") as fin:
+    for line in fin:
+        label_dict[label_idx] = line.strip()
+        label_idx += 1
+seq = Sequential([
+    URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+start = time.time()
+image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
+for i in range(10):
+    img = seq(image_file)
+    fetch_map = client.predict(
+        feed={"image": img}, fetch=["score"], batch=False)
+    print(fetch_map)
+end = time.time()
+print(end - start)
--- a/python/examples/imagenet/resnet50_web_service.py
+++ b/python/examples/imagenet/resnet50_web_service.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-from paddle_serving_client import Client
-import numpy as np
-from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-if len(sys.argv) != 4:
-    print("python resnet50_web_service.py model device port")
-    sys.exit(-1)
-device = sys.argv[2]
-if device == "cpu":
-    from paddle_serving_server.web_service import WebService
-else:
-    from paddle_serving_server.web_service import WebService
-class ImageService(WebService):
-    def init_imagenet_setting(self):
-        self.seq = Sequential([
-            URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose(
-                (2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406],
-                                                [0.229, 0.224, 0.225], True)
-        ])
-        self.label_dict = {}
-        label_idx = 0
-        with open("imagenet.label") as fin:
-            for line in fin:
-                self.label_dict[label_idx] = line.strip()
-                label_idx += 1
-    def preprocess(self, feed=[], fetch=[]):
-        feed_batch = []
-        is_batch = True
-        for ins in feed:
-            if "image" not in ins:
-                raise ("feed data error!")
-            img = self.seq(ins["image"])
-            feed_batch.append({"image": img[np.newaxis, :]})
-        return feed_batch, fetch, is_batch
-    def postprocess(self, feed=[], fetch=[], fetch_map={}):
-        score_list = fetch_map["score"]
-        result = {"label": [], "prob": []}
-        for score in score_list:
-            score = score.tolist()
-            max_score = max(score)
-            result["label"].append(self.label_dict[score.index(max_score)]
-                                   .strip().replace(",", ""))
-            result["prob"].append(max_score)
-        return result
-image_service = ImageService(name="image")
-image_service.load_model_config(sys.argv[1])
-image_service.init_imagenet_setting()
-if device == "gpu":
-    image_service.set_gpus("0")
-image_service.prepare_server(
-    workdir="workdir", port=int(sys.argv[3]), device=device)
-image_service.run_rpc_service()
-image_service.run_web_service()
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -9,24 +9,20 @@ sh get_data.sh
 ```
 the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
-### Start RPC inference service
+### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
 ```
 python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
 ```
-### RPC Infer
+### BRPC-Client Infer
 ```
 head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
 it will get predict results of the first 10 test cases.
-### Start HTTP inference service
-```
-python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
-```
-### HTTP Infer
+### GRPC-Client/Http-Client Infer
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
+head test_data/part-0 | python test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
--- a/python/examples/imdb/README_CN.md
+++ b/python/examples/imdb/README_CN.md
@@ -9,23 +9,18 @@ sh get_data.sh
 ```
 脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
-### 启动RPC预测服务
+### 启动预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
 ```
 python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
 ```
-### 执行预测
+### BRPC-Client预测
 ```
 head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
 预测test_data/part-0的前十个样例。
-### 启动HTTP预测服务
+### BRPC-Client预测
 ```
-python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
+head test_data/part-0 | python test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
-```
-### 执行预测
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
--- a/python/examples/imdb/imdb_web_service_demo.sh
+++ b/python/examples/imdb/imdb_web_service_demo.sh
-wget https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_service.tar.gz
-tar -xzf imdb_service.tar.gz
-wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
-tar -zxvf text_classification_data.tar.gz
-python text_classify_service.py serving_server_model/ workdir imdb.vocab
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -12,37 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=doc-string-missing
+from paddle_serving_client import HttpClient
-from paddle_serving_server.web_service import WebService
 from paddle_serving_app.reader.imdb_reader import IMDBDataset
 import sys
 import numpy as np
+client = HttpClient(ip='127.0.0.1', port='9292')
+client.load_client_config(sys.argv[1])
+#client.set_ip('127.0.0.1')
+#client.set_port('9292')
+''' 
+if you want use GRPC-client, set_use_grpc_client(True)
+or you can directly use client.grpc_client_predict(...)
+as for HTTP-client,set_use_grpc_client(False)(which is default)
+or you can directly use client.http_client_predict(...)
+'''
+#client.set_use_grpc_client(True)
+'''
+if you want to enable Encrypt Module,uncommenting the following line
+'''
+#client.use_key("./key")
+'''
+if you want to compress,uncommenting the following line
+'''
+#client.set_response_compress(True)
+#client.set_request_compress(True)
+'''
+we recommend use Proto data format in HTTP-body, set True(which is default)
+if you want use JSON data format in HTTP-body, set False
+'''
+#client.set_http_proto(True)
-class IMDBService(WebService):
+# you can define any english sentence or dataset here
-    def prepare_dict(self, args={}):
+# This example reuses imdb reader in training, you
-        if len(args) == 0:
+# can define your own data preprocessing easily.
-            exit(-1)
+imdb_dataset = IMDBDataset()
-        self.dataset = IMDBDataset()
+imdb_dataset.load_resource(sys.argv[2])
-        self.dataset.load_resource(args["dict_file_path"])
-    def preprocess(self, feed={}, fetch=[]):
-        feed_batch = []
-        words_lod = [0]
-        is_batch = True
-        for ins in feed:
-            words = self.dataset.get_words_only(ins["words"])
-            words = np.array(words).reshape(len(words), 1)
-            words_lod.append(words_lod[-1] + len(words))
-            feed_batch.append(words)
-        feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod}
-        return feed, fetch, is_batch
-imdb_service = IMDBService(name="imdb")
+for line in sys.stdin:
-imdb_service.load_model_config(sys.argv[1])
+    word_ids, label = imdb_dataset.get_words_and_label(line)
-imdb_service.prepare_server(
+    word_len = len(word_ids)
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+    feed = {
-imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
+        "words": np.array(word_ids).reshape(word_len, 1),
-imdb_service.run_rpc_service()
+        "words.lod": [0, word_len]
-imdb_service.run_web_service()
+    }
+    #print(feed)
+    fetch = ["prediction"]
+    fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
+    print(fetch_map)
--- a/python/examples/lac/README.md
+++ b/python/examples/lac/README.md
@@ -8,24 +8,19 @@ python -m paddle_serving_app.package --get_model lac
 tar -xzvf lac.tar.gz
 ```
-#### Start RPC inference service
+#### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
 ```
 python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
-### RPC Infer
+### BRPC Infer
 ```
 echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
 It will get the segmentation result. 
-### Start HTTP inference service
+### GRPC/Http Infer
 ```
-python lac_web_service.py lac_model/ lac_workdir 9292
+echo "我爱北京天安门" | python lac_http_client.py lac_client/serving_client_conf.prototxt
-```
-### HTTP Infer
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
 ```
--- a/python/examples/lac/README_CN.md
+++ b/python/examples/lac/README_CN.md
@@ -8,24 +8,19 @@ python -m paddle_serving_app.package --get_model lac
 tar -xzvf lac.tar.gz
 ```
-#### 开启RPC预测服务
+#### 开启预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
 ```
 python -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
-### 执行RPC预测
+### 执行BRPC预测
 ```
 echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
 ```
 我们就能得到分词结果
-### 开启HTTP预测服务
+### 执行GRPC/Http预测
 ```
-python lac_web_service.py lac_model/ lac_workdir 9292
+echo "我爱北京天安门" | python lac_http_client.py lac_client/serving_client_conf.prototxt
-```
-### 执行HTTP预测
-```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
 ```
--- a/python/examples/lac/lac_http_client.py
+++ b/python/examples/lac/lac_http_client.py
+# encoding=utf-8
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,17 +12,56 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#coding=utf-8
+# pylint: disable=doc-string-missing
-import requests
-import json
-import time
-if __name__ == "__main__":
+from paddle_serving_client import HttpClient
-    server = "http://127.0.0.1:9280/lac/prediction"
+from paddle_serving_app.reader import LACReader
-    fin = open("jieba_test.txt", "r")
+import sys
-    start = time.time()
+import os
-    for line in fin:
+import io
-        req_data = {"words": line.strip(), "fetch": ["crf_decode"]}
+import numpy as np
-        r = requests.post(server, json=req_data)
-    end = time.time()
+client = HttpClient(ip='127.0.0.1', port='9292')
-    print(end - start)
+client.load_client_config(sys.argv[1])
+#client.set_ip('127.0.0.1')
+#client.set_port('9292')
+''' 
+if you want use GRPC-client, set_use_grpc_client(True)
+or you can directly use client.grpc_client_predict(...)
+as for HTTP-client,set_use_grpc_client(False)(which is default)
+or you can directly use client.http_client_predict(...)
+'''
+#client.set_use_grpc_client(True)
+'''
+if you want to enable Encrypt Module,uncommenting the following line
+'''
+#client.use_key("./key")
+'''
+if you want to compress,uncommenting the following line
+'''
+#client.set_response_compress(True)
+#client.set_request_compress(True)
+'''
+we recommend use Proto data format in HTTP-body, set True(which is default)
+if you want use JSON data format in HTTP-body, set False
+'''
+#client.set_http_proto(True)
+reader = LACReader()
+for line in sys.stdin:
+    if len(line) <= 0:
+        continue
+    feed_data = reader.process(line)
+    if len(feed_data) <= 0:
+        continue
+    print(feed_data)
+    #fetch_map = client.predict(feed={"words": np.array(feed_data).reshape(len(feed_data), 1), "words.lod": [0, len(feed_data)]}, fetch=["crf_decode"], batch=True)
+    fetch_map = client.predict(
+        feed={
+            "words": np.array(feed_data + feed_data).reshape(
+                len(feed_data) * 2, 1),
+            "words.lod": [0, len(feed_data), 2 * len(feed_data)]
+        },
+        fetch=["crf_decode"],
+        batch=True)
+    print(fetch_map)
--- a/python/examples/lac/lac_web_service.py
+++ b/python/examples/lac/lac_web_service.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle_serving_server.web_service import WebService
-import sys
-from paddle_serving_app.reader import LACReader
-import numpy as np
-class LACService(WebService):
-    def load_reader(self):
-        self.reader = LACReader()
-    def preprocess(self, feed={}, fetch=[]):
-        feed_batch = []
-        fetch = ["crf_decode"]
-        lod_info = [0]
-        is_batch = True
-        for ins in feed:
-            if "words" not in ins:
-                raise ("feed data error!")
-            feed_data = self.reader.process(ins["words"])
-            feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
-            lod_info.append(lod_info[-1] + len(feed_data))
-        feed_dict = {
-            "words": np.concatenate(
-                feed_batch, axis=0),
-            "words.lod": lod_info
-        }
-        return feed_dict, fetch, is_batch
-    def postprocess(self, feed={}, fetch=[], fetch_map={}):
-        batch_ret = []
-        for idx, ins in enumerate(feed):
-            begin = fetch_map['crf_decode.lod'][idx]
-            end = fetch_map['crf_decode.lod'][idx + 1]
-            segs = self.reader.parse_result(ins["words"],
-                                            fetch_map["crf_decode"][begin:end])
-            batch_ret.append({"word_seg": "|".join(segs)})
-        return batch_ret
-lac_service = LACService(name="lac")
-lac_service.load_model_config(sys.argv[1])
-lac_service.load_reader()
-lac_service.prepare_server(
-    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
-lac_service.run_rpc_service()
-lac_service.run_web_service()
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -304,16 +304,20 @@ class Client(object):
        if isinstance(feed, dict):
            feed_batch.append(feed)
        elif isinstance(feed, list):
-            # if input is a list and the number of feed_var is 1.
+            # feed = [dict]
-            # create a temp_dict { key = feed_var_name, value = list}
+            if len(feed) == 1 and isinstance(feed[0], dict):
-            # put the temp_dict into the feed_batch.
+                feed_batch = feed
-            if len(self.feed_names_) != 1:
+            else:
-                raise ValueError(
+                # if input is a list and the number of feed_var is 1.
-                    "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
+                # create a temp_dict { key = feed_var_name, value = list}
-                )
+                # put the temp_dict into the feed_batch.
-            temp_dict = {}
+                if len(self.feed_names_) != 1:
-            temp_dict[self.feed_names_[0]] = feed
+                    raise ValueError(
-            feed_batch.append(temp_dict)
+                        "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
+                    )
+                temp_dict = {}
+                temp_dict[self.feed_names_[0]] = feed
+                feed_batch.append(temp_dict)
        else:
            raise ValueError("Feed only accepts dict and list of dict")

--- a/python/paddle_serving_client/httpclient.py
+++ b/python/paddle_serving_client/httpclient.py
@@ -73,9 +73,9 @@ def data_bytes_number(datalist):
 # 可以直接调用需要的http_client_predict/grpc_client_predict
 # 例如，如果想使用GRPC方式，set_use_grpc_client(True)
 # 或者直接调用grpc_client_predict()
-class GeneralClient(object):
+class HttpClient(object):
    def __init__(self,
-                 ip="0.0.0.0",
+                 ip="127.0.0.1",
                 port="9393",
                 service_name="/GeneralModelService/inference"):
        self.feed_names_ = []
@@ -84,7 +84,7 @@ class GeneralClient(object):
        self.feed_shapes_ = {}
        self.feed_types_ = {}
        self.feed_names_to_idx_ = {}
-        self.timeout_ms = 200000
+        self.timeout_ms = 20000
        self.ip = ip
        self.port = port
        self.server_port = port
@@ -96,6 +96,17 @@ class GeneralClient(object):
        self.http_proto = True
        self.max_body_size = 512 * 1024 * 1024
        self.use_grpc_client = False
+        # 使用连接池能够不用反复建立连接
+        self.requests_session = requests.session()
+        # 初始化grpc_stub
+        options = [('grpc.max_receive_message_length', self.max_body_size),
+                   ('grpc.max_send_message_length', self.max_body_size)]
+        endpoints = [self.ip + ":" + self.server_port]
+        g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
+        self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
+        self.stub_ = general_model_service_pb2_grpc.GeneralModelServiceStub(
+            self.channel_)
    def load_client_config(self, model_config_path_list):
        if isinstance(model_config_path_list, str):
@@ -155,6 +166,7 @@ class GeneralClient(object):
    def set_max_body_size(self, max_body_size):
        self.max_body_size = max_body_size
+        self.init_grpc_stub()
    def set_timeout_ms(self, timeout_ms):
        if not isinstance(timeout_ms, int):
@@ -162,14 +174,23 @@ class GeneralClient(object):
        else:
            self.timeout_ms = timeout_ms
+    def set_max_retries(self, retry_times=3):
+        if not isinstance(retry_times, int):
+            raise ValueError("retry_times must be int type.")
+        else:
+            self.requests_session.mount(
+                'http://', HTTPAdapter(max_retries=retry_times))
    def set_ip(self, ip):
        self.ip = ip
+        self.init_grpc_stub()
    def set_service_name(self, service_name):
        self.service_name = service_name
    def set_port(self, port):
        self.port = port
+        self.init_grpc_stub()
    def set_request_compress(self, try_request_gzip):
        self.try_request_gzip = try_request_gzip
@@ -195,13 +216,14 @@ class GeneralClient(object):
            req = json.dumps({"key": base64.b64encode(self.key).decode()})
        else:
            req = json.dumps({})
-        r = requests.post(encrypt_url, req)
+        with requests.post(
-        result = r.json()
+                encrypt_url, data=req, timeout=self.timeout_ms / 1000) as r:
-        if "endpoint_list" not in result:
+            result = r.json()
-            raise ValueError("server not ready")
+            if "endpoint_list" not in result:
-        else:
+                raise ValueError("server not ready")
-            self.server_port = str(result["endpoint_list"][0])
+            else:
-            print("rpc port is ", self.server_port)
+                self.server_port = str(result["endpoint_list"][0])
+                print("rpc port is ", self.server_port)
    def get_feed_names(self):
        return self.feed_names_
@@ -239,6 +261,10 @@ class GeneralClient(object):
        if isinstance(feed, dict):
            feed_dict = feed
        elif isinstance(feed, (list, str, tuple)):
+            # feed = [dict]
+            if len(feed) == 1 and isinstance(feed[0], dict):
+                feed_dict = feed[0]
+                return feed_dict
            # if input is a list or str or tuple, and the number of feed_var is 1.
            # create a feed_dict { key = feed_var_name, value = list}
            if len(self.feed_names_) == 1:
@@ -443,8 +469,11 @@ class GeneralClient(object):
        # 当数据区长度大于512字节时才压缩.
        try:
            if self.try_request_gzip and self.total_data_number > 512:
-                origin_data = postData
-                postData = gzip.compress(bytes(postData, 'utf-8'))
+                if self.http_proto:
+                    postData = gzip.compress(postData)
+                else:
+                    postData = gzip.compress(bytes(postData, 'utf-8'))
                headers["Content-Encoding"] = "gzip"
            if self.try_response_gzip:
                headers["Accept-encoding"] = "gzip"
@@ -452,11 +481,14 @@ class GeneralClient(object):
        except:
            print("compress error, we will use the no-compress data")
            headers.pop("Content-Encoding", "nokey")
-            postData = origin_data
        # requests支持自动识别解压
        try:
-            result = requests.post(url=web_url, headers=headers, data=postData)
+            result = self.requests_session.post(
+                url=web_url,
+                headers=headers,
+                data=postData,
+                timeout=self.timeout_ms / 1000)
+            result.raise_for_status()
        except:
            print("http post error")
            return None
@@ -484,6 +516,16 @@ class GeneralClient(object):
        postData = self.process_proto_data(feed_dict, fetch_list, batch, log_id)
+        try:
+            resp = self.stub_.inference(
+                postData, timeout=self.timeout_ms / 1000)
+        except:
+            print("Grpc inference error occur")
+            return None
+        else:
+            return resp
+    def init_grpc_stub(self):
        # https://github.com/tensorflow/serving/issues/1382
        options = [('grpc.max_receive_message_length', self.max_body_size),
                   ('grpc.max_send_message_length', self.max_body_size)]
@@ -493,10 +535,7 @@ class GeneralClient(object):
        self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
        self.stub_ = general_model_service_pb2_grpc.GeneralModelServiceStub(
            self.channel_)
-        try:
-            resp = self.stub_.inference(postData, timeout=self.timeout_ms)
+    def __del__(self):
-        except:
+        self.requests_session.close()
-            print("Grpc inference error occur")
+        self.channel_.close()
-            return None
-        else:
-            return resp
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -37,7 +37,7 @@ import socket
 def port_is_available(port):
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
        sock.settimeout(2)
-        result = sock.connect_ex(('0.0.0.0', port))
+        result = sock.connect_ex(('127.0.0.1', port))
    if result != 0:
        return True
    else:

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -537,7 +537,7 @@ class Server(object):
    def port_is_available(self, port):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
            sock.settimeout(2)
-            result = sock.connect_ex(('0.0.0.0', port))
+            result = sock.connect_ex(('127.0.0.1', port))
        if result != 0:
            return True
        else:

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -33,7 +33,7 @@ from paddle_serving_server.serve import format_gpu_to_strlist
 def port_is_available(port):
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
        sock.settimeout(2)
-        result = sock.connect_ex(('0.0.0.0', port))
+        result = sock.connect_ex(('127.0.0.1', port))
    if result != 0:
        return True
    else:

--- a/python/pipeline/util.py
+++ b/python/pipeline/util.py
@@ -39,7 +39,7 @@ class AvailablePortGenerator(object):
    def port_is_available(port):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
            sock.settimeout(2)
-            result = sock.connect_ex(('0.0.0.0', port))
+            result = sock.connect_ex(('127.0.0.1', port))
        if result != 0:
            return True
        else: