Merge pull request #281 from MRXLT/general-server-doc

refine demo script and readme

Merge pull request #281 from MRXLT/general-server-doc
refine demo script and readme
4f6b84ae · MRXLT · GitHub · 37a039ea · 311a3cd5 · 4f6b84ae
31 changed file
--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
@@ -12,29 +12,43 @@ python prepare_model.py
 生成server端配置文件与模型文件，存放在serving_server_model文件夹
 生成client端配置文件，存放在serving_client_conf文件夹

-### 启动预测服务
+### 获取词典和样例数据
+
+```
+sh get_data.sh
+```
+脚本将下载中文词典vocab.txt和中文样例数据data-c.txt
+
+### 启动RPC预测服务
 执行
 ```
-python bert_server.py serving_server_model 9292 #启动cpu预测服务
+python -m paddle_serving_server.serve --model serving_server_model/ --port 9292  #启动cpu预测服务
 ```
 或者
 ```
-python bert_gpu_server.py serving_server_model 9292 0 #在gpu 0上启动gpu预测服务
+python -m paddle_serving_server_gpu.serve --model serving_server_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
 ```

 ### 执行预测

-执行
 ```
-sh get_data.sh
+python bert_rpc_client.py --thread 4 
 ```
-获取中文样例数据
+启动client读取data-c.txt中的数据进行预测，--thread参数控制client的进程数，预测结束后会打印出每个进程的耗时,server端的地址在脚本中修改。
+
+### 启动HTTP预测服务
+```
+ export CUDA_VISIBLE_DEVICES=0,1
+```
+通过环境变量指定gpu预测服务使用的gpu，示例中指定索引为0和1的两块gpu
+```
+ python bert_web_service.py serving_server_model/ 9292 #启动gpu预测服务
+```
+### 执行预测

-执行
 ```
-head data-c.txt | python bert_client.py
+curl -H "Content-Type:application/json" -X POST -d '{"words": "hello", "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
 ```
-将预测样例数据中的前十条样例，并将向量表示打印到标准输出。

 ### Benchmark


--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -33,38 +33,45 @@ args = benchmark_args()

 def single_func(idx, resource):
    fin = open("data-c.txt")
+    dataset = []
+    for line in fin:
+        dataset.append(line.strip())
    if args.request == "rpc":
-        reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
+        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
        fetch = ["pooled_output"]
        client = Client()
        client.load_client_config(args.model)
-        client.connect([resource["endpoint"][idx % 4]])
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])

        start = time.time()
-        for line in fin:
-            feed_dict = reader.process(line)
-            result = client.predict(feed=feed_dict, fetch=fetch)
-        end = time.time()
+        for i in range(1000):
+            if args.batch_size == 1:
+                feed_dict = reader.process(dataset[i])
+                result = client.predict(feed=feed_dict, fetch=fetch)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
    elif args.request == "http":
        start = time.time()
        header = {"Content-Type": "application/json"}
-        for line in fin:
-            dict_data = {"words": line, "fetch": ["pooled_output"]}
+        for i in range(1000):
+            dict_data = {"words": dataset[i], "fetch": ["pooled_output"]}
            r = requests.post(
-                'http://{}/bert/prediction'.format(resource["endpoint"][0]),
+                'http://{}/bert/prediction'.format(resource["endpoint"][
+                    idx % len(resource["endpoint"])]),
                data=json.dumps(dict_data),
                headers=header)
-        end = time.time()
+    end = time.time()
    return [[end - start]]


 if __name__ == '__main__':
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = []
-    card_num = 4
-    for i in range(args.thread):
-        endpoint_list.append("127.0.0.1:{}".format(9494 + i % card_num))
-    print(endpoint_list)
+    endpoint_list = ["127.0.0.1:9292"]
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})
-    print(result)
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
 rm profile_log
-for thread_num in 1 4 8 12 16 20 24
+for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log
 done
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
@@ -27,52 +27,45 @@ import tokenization
 import requests
 import json
 from bert_reader import BertReader
-
 args = benchmark_args()

-batch_size = 24
-

 def single_func(idx, resource):
    fin = open("data-c.txt")
+    dataset = []
+    for line in fin:
+        dataset.append(line.strip())
    if args.request == "rpc":
-        reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
+        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
        fetch = ["pooled_output"]
        client = Client()
        client.load_client_config(args.model)
-        client.connect([resource["endpoint"][idx % 4]])
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])

        start = time.time()
-        idx = 0
-        batch_data = []
-        for line in fin:
-            feed_dict = reader.process(line)
-            batch_data.append(feed_dict)
-            idx += 1
-            if idx % batch_size == 0:
+        for i in range(1000):
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    feed_batch.append(reader.process(dataset[i]))
                result = client.batch_predict(
-                    feed_batch=batch_data, fetch=fetch)
-                batch_data = []
-        end = time.time()
+                    feed_batch=feed_batch, fetch=fetch)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
    elif args.request == "http":
-        header = {"Content-Type": "application/json"}
-        for line in fin:
-            dict_data = {"words": line, "fetch": ["pooled_output"]}
-            r = requests.post(
-                'http://{}/bert/prediction'.format(resource["endpoint"][0]),
-                data=json.dumps(dict_data),
-                headers=header)
-        end = time.time()
+        raise ("no batch predict for http")
+    end = time.time()
    return [[end - start]]


 if __name__ == '__main__':
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = []
-    card_num = 4
-    for i in range(args.thread):
-        endpoint_list.append("127.0.0.1:{}".format(9494 + i % card_num))
-    print(endpoint_list)
+    endpoint_list = ["127.0.0.1:9292"]
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})
-    print(result)
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/bert/benchmark_batch.sh
+++ b/python/examples/bert/benchmark_batch.sh
 rm profile_log
-thread_num=1
-for batch_size in 1 4 8 16 32 64 128 256
+for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark_batch.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log
 done
+done
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -34,5 +34,6 @@ bert_service.load_model_config(sys.argv[1])
 gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
 gpus = [int(x) for x in gpu_ids.split(",")]
 bert_service.set_gpus(gpus)
-bert_service.prepare_server(workdir="workdir", port=9494, device="gpu")
+bert_service.prepare_server(
+    workdir="workdir", port=int(sys.argv[2]), device="gpu")
 bert_service.run_server()
--- a/python/examples/bert/get_data.sh
+++ b/python/examples/bert/get_data.sh
 wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
+wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
--- a/python/examples/criteo_ctr/README.md
+++ b/python/examples/criteo_ctr/README.md
-# CTR task on Criteo Dataset
+## CTR预测服务
+
+### 获取样例数据
+```
+sh get_data.sh
+```
+
+### 保存模型和配置文件
+```
+python local_train.py
+```
+执行脚本后会在当前目录生成serving_server_model和serving_client_config文件夹。
+
+### 启动RPC预测服务
+
+```
+python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292
+```
+
+### 执行预测
+
+```
+python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
+```
--- a/python/examples/criteo_ctr/benchmark.py
+++ b/python/examples/criteo_ctr/benchmark.py
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+import requests
+import json
+import criteo_reader as criteo
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+    batch = 1
+    buf_size = 100
+    dataset = criteo.CriteoDataset()
+    dataset.setup(1000001)
+    test_filelists = [
+        "./raw_data/part-%d" % x for x in range(len(os.listdir("./raw_data")))
+    ]
+    reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
+                                  batch, buf_size)
+    if args.request == "rpc":
+        fetch = ["prob"]
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+
+        start = time.time()
+        for i in range(1000):
+            if args.batch_size == 1:
+                data = reader().next()
+                feed_dict = {}
+                for i in range(1, 27):
+                    feed_dict["sparse_{}".format(i - 1)] = data[0][i]
+                result = client.predict(feed=feed_dict, fetch=fetch)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
+    elif args.request == "http":
+        raise ("Not support http service.")
+    end = time.time()
+    return [[end - start]]
+
+
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = ["127.0.0.1:9292"]
+    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
+    result = multi_thread_runner.run(single_func, args.thread,
+                                     {"endpoint": endpoint_list})
+    #result = single_func(0, {"endpoint": endpoint_list})
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/criteo_ctr/benchmark.sh
+++ b/python/examples/criteo_ctr/benchmark.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ctr_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
--- a/python/examples/criteo_ctr/benchmark_batch.py
+++ b/python/examples/criteo_ctr/benchmark_batch.py
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+import requests
+import json
+import criteo_reader as criteo
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+    batch = 1
+    buf_size = 100
+    dataset = criteo.CriteoDataset()
+    dataset.setup(1000001)
+    test_filelists = [
+        "./raw_data/part-%d" % x for x in range(len(os.listdir("./raw_data")))
+    ]
+    reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
+                                  batch, buf_size)
+    if args.request == "rpc":
+        fetch = ["prob"]
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+
+        start = time.time()
+        for i in range(1000):
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    feed_dict = {}
+                    data = reader().next()
+                    for i in range(1, 27):
+                        feed_dict["sparse_{}".format(i - 1)] = data[0][i]
+                    feed_batch.append(feed_dict)
+                result = client.batch_predict(
+                    feed_batch=feed_batch, fetch=fetch)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
+    elif args.request == "http":
+        raise ("no batch predict for http")
+    end = time.time()
+    return [[end - start]]
+
+
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = ["127.0.0.1:9292"]
+    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
+    result = multi_thread_runner.run(single_func, args.thread,
+                                     {"endpoint": endpoint_list})
+    #result = single_func(0, {"endpoint": endpoint_list})
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/criteo_ctr/benchmark_batch.sh
+++ b/python/examples/criteo_ctr/benchmark_batch.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
+done
--- a/python/examples/criteo_ctr/get_data.sh
+++ b/python/examples/criteo_ctr/get_data.sh
 wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz
-tar -zxvf *ctr_data.tar.gz
+tar -zxvf ctr_data.tar.gz
--- a/python/examples/criteo_ctr/test_client.py
+++ b/python/examples/criteo_ctr/test_client.py
@@ -17,6 +17,7 @@ from paddle_serving_client import Client
 import paddle
 import sys
 import os
+import time
 import criteo_reader as criteo
 from paddle_serving_client.metric import auc

@@ -34,12 +35,15 @@ test_filelists = [
 ]
 reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:], batch,
                              buf_size)
-
 label_list = []
 prob_list = []
-for data in reader():
+start = time.time()
+for ei in range(1000):
+    data = reader().next()
    feed_dict = {}
    for i in range(1, 27):
        feed_dict["sparse_{}".format(i - 1)] = data[0][i]
    fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
-    print(fetch_map)
+    #print(fetch_map)
+end = time.time()
+print(end - start)
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -2,26 +2,34 @@

 示例中采用ResNet50_vd模型执行imagenet 1000分类任务。

-### 模型及配置文件获取
+### 获取模型配置文件和样例数据
 ```
 sh get_model.sh
 ```
-### 执行wb service预测服务
+### 执行HTTP预测服务

 启动server端
 ```
-python image_classification_service.py conf_and_model/serving_server_model workdir 9393
+python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu预测服务
 ```
+```
+python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu预测服务
+```
+

 client端进行预测
 ```
 python image_http_client.py
 ```
-### 执行rpc service预测服务
+### 执行RPC预测服务

 启动server端
 ```
-python -m paddle_serving_server.serve --model conf_and_model/serving_server_model/ --port 9393
+python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu预测服务
+```
+
+```
+python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu预测服务
 ```

 client端进行预测

--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -18,23 +18,28 @@ from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
 import time
+import os

 args = benchmark_args()


 def single_func(idx, resource):
+    file_list = []
+    for file_name in os.listdir("./image_data/n01440764"):
+        file_list.append(file_name)
+    img_list = []
+    for i in range(1000):
+        img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
    if args.request == "rpc":
        reader = ImageReader()
        fetch = ["score"]
        client = Client()
        client.load_client_config(args.model)
-        client.connect([resource["endpoint"][idx % 4]])
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])

        start = time.time()
        for i in range(1000):
-            with open("./data/n01440764_10026.JPEG") as f:
-                img = f.read()
-            img = reader.process_image(img).reshape(-1)
+            img = reader.process_image(img_list[i]).reshape(-1)
            fetch_map = client.predict(feed={"image": img}, fetch=["score"])
        end = time.time()
        return [[end - start]]
@@ -43,10 +48,14 @@ def single_func(idx, resource):

 if __name__ == "__main__":
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = []
-    card_num = 4
-    for i in range(args.thread):
-        endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))
+    endpoint_list = ["127.0.0.1:9393"]
+    #card_num = 4
+    #for i in range(args.thread):
+    #    endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})
-    print(result)
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/imagenet/benchmark.sh
+++ b/python/examples/imagenet/benchmark.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
--- a/python/examples/imagenet/benchmark_batch.py
+++ b/python/examples/imagenet/benchmark_batch.py
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+import requests
+import json
+from image_reader import ImageReader
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+    file_list = []
+    for file_name in os.listdir("./image_data/n01440764"):
+        file_list.append(file_name)
+    img_list = []
+    for i in range(1000):
+        img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
+    if args.request == "rpc":
+        reader = ImageReader()
+        fetch = ["score"]
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+        start = time.time()
+        for i in range(1000):
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    img = reader.process_image(img_list[i])
+                    img = img.reshape(-1)
+                    feed_batch.append({"image": img})
+                result = client.batch_predict(
+                    feed_batch=feed_batch, fetch=fetch)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
+    elif args.request == "http":
+        raise ("no batch predict for http")
+    end = time.time()
+    return [[end - start]]
+
+
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = ["127.0.0.1:9393"]
+    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
+    result = multi_thread_runner.run(single_func, args.thread,
+                                     {"endpoint": endpoint_list})
+    #result = single_func(0, {"endpoint": endpoint_list})
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("average total cost {} s.".format(avg_cost))
--- a/python/examples/imagenet/benchmark_batch.sh
+++ b/python/examples/imagenet/benchmark_batch.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
+done
--- a/python/examples/imagenet/get_model.sh
+++ b/python/examples/imagenet/get_model.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/conf_and_model.tar.gz
-tar -xzvf conf_and_model.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz
+tar -xzvf ResNet50_vd.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz
+tar -xzvf ResNet101_vd.tar.gz
+
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/image_data.tar.gz
+tar -xzvf imgae_data.tar.gz
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
@@ -26,11 +26,10 @@ def predict(image_path, server):


 if __name__ == "__main__":
-    server = "http://127.0.0.1:9292/image/prediction"
+    server = "http://127.0.0.1:9393/image/prediction"
    image_path = "./data/n01440764_10026.JPEG"
    start = time.time()
    for i in range(1000):
        predict(image_path, server)
-        print(i)
    end = time.time()
    print(end - start)
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
-### 使用方法
+## IMDB评论情绪预测服务

-假设数据文件为test.data，配置文件为inference.conf

-单进程client
+### 获取模型文件和样例数据
+
+```
+sh get_data.sh
+```
+脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
+
+### 启动RPC预测服务
+
 ```
-cat test.data | python test_client.py inference.conf > result
+python -m paddle_serving_server.serve --model imdb_bow_model/ --port 9292
 ```
-多进程client，若进程数为4
+### 执行预测
 ```
-python test_client_multithread.py inference.conf test.data 4 > result
+head test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
-batch clienit，若batch size为4
+预测test_data/part-0的前十个样例。
+
+### 启动HTTP预测服务
+```
+python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
+```
+### 执行预测
+
 ```
-cat test.data | python test_client_batch.py inference.conf 4 > result
+curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```

 ### Benchmark

--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -26,24 +26,24 @@ args = benchmark_args()

 def single_func(idx, resource):
    imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource(args.vocab)
-    filelist_fn = args.filelist
-    filelist = []
-    start = time.time()
-    with open(filelist_fn) as fin:
+    imdb_dataset.load_resource("./imdb.vocab")
+    dataset = []
+    with open("./test_data/part-0") as fin:
        for line in fin:
-            filelist.append(line.strip())
-    filelist = filelist[idx::args.thread]
+            dataset.append(line.strip())
+    start = time.time()
    if args.request == "rpc":
        client = Client()
        client.load_client_config(args.model)
        client.connect([args.endpoint])
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
+        for i in range(1000):
+            if args.batch_size == 1:
                word_ids, label = imdb_dataset.get_words_and_label(line)
                fetch_map = client.predict(
                    feed={"words": word_ids}, fetch=["prediction"])
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
    elif args.request == "http":
        for fn in filelist:
            fin = open(fn)

--- a/python/examples/imdb/benchmark.sh
+++ b/python/examples/imdb/benchmark.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
--- a/python/examples/imdb/benchmark_batch.py
+++ b/python/examples/imdb/benchmark_batch.py
@@ -11,77 +11,55 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 import sys
+import time
+import requests
+from imdb_reader import IMDBDataset
 from paddle_serving_client import Client
-from paddle_serving_client.metric import auc
 from paddle_serving_client.utils import MultiThreadRunner
-import time
+from paddle_serving_client.utils import benchmark_args
+
+args = benchmark_args()


-def predict(thr_id, resource):
-    client = Client()
-    client.load_client_config(resource["conf_file"])
-    client.connect(resource["server_endpoint"])
-    thread_num = resource["thread_num"]
-    file_list = resource["filelist"]
-    line_id = 0
-    prob = []
-    label_list = []
+def single_func(idx, resource):
+    imdb_dataset = IMDBDataset()
+    imdb_dataset.load_resource("./imdb.vocab")
    dataset = []
-    for fn in file_list:
-        fin = open(fn)
+    with open("./test_data/part-0") as fin:
        for line in fin:
-            if line_id % thread_num == thr_id - 1:
-                group = line.strip().split()
-                words = [int(x) for x in group[1:int(group[0])]]
-                label = [int(group[-1])]
-                feed = {"words": words, "label": label}
-                dataset.append(feed)
-            line_id += 1
-        fin.close()
-
+            dataset.append(line.strip())
    start = time.time()
-    fetch = ["acc", "cost", "prediction"]
-    infer_time_list = []
-    counter = 0
-    feed_list = []
-    for inst in dataset:
-        counter += 1
-        feed_list.append(inst)
-        if counter == resource["batch_size"]:
-            fetch_map_batch, infer_time = client.batch_predict(
-                feed_batch=feed_list, fetch=fetch, profile=True)
-            #prob.append(fetch_map["prediction"][1])
-            #label_list.append(label[0])
-            infer_time_list.append(infer_time)
-            counter = 0
-            feed_list = []
-    if counter != 0:
-        fetch_map_batch, infer_time = client.batch_predict(
-            feed_batch=feed_list, fetch=fetch, profile=True)
-        infer_time_list.append(infer_time)
+    if args.request == "rpc":
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([args.endpoint])
+        for i in range(1000):
+            if args.batch_size >= 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    word_ids, label = imdb_dataset.get_words_and_label(line)
+                    feed_batch.append({"words": word_ids})
+                result = client.batch_predict(
+                    feed_batch=feed_batch, fetch=["prediction"])
+            else:
+                print("unsupport batch size {}".format(args.batch_size))

+    elif args.request == "http":
+        for fn in filelist:
+            fin = open(fn)
+            for line in fin:
+                word_ids, label = imdb_dataset.get_words_and_label(line)
+                r = requests.post(
+                    "http://{}/imdb/prediction".format(args.endpoint),
+                    data={"words": word_ids,
+                          "fetch": ["prediction"]})
    end = time.time()
-    client.release()
-    return [prob, label_list, [sum(infer_time_list)], [end - start]]
-
-
-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    data_file = sys.argv[2]
-    resource = {}
-    resource["conf_file"] = conf_file
-    resource["server_endpoint"] = ["127.0.0.1:9292"]
-    resource["filelist"] = [data_file]
-    resource["thread_num"] = int(sys.argv[3])
-    resource["batch_size"] = int(sys.argv[4])
+    return [[end - start]]

-    thread_runner = MultiThreadRunner()
-    result = thread_runner.run(predict, int(sys.argv[3]), resource)

-    print("thread num {}\tbatch size {}\ttotal time {}".format(sys.argv[
-        3], resource["batch_size"], sum(result[-1]) / len(result[-1])))
-    print("thread num {}\tbatch size {}\tinfer time {}".format(
-        sys.argv[3], resource["batch_size"],
-        sum(result[2]) / 1000.0 / 1000.0 / len(result[2])))
+multi_thread_runner = MultiThreadRunner()
+result = multi_thread_runner.run(single_func, args.thread, {})
+print(result)
--- a/python/examples/imdb/benchmark_batch.sh
+++ b/python/examples/imdb/benchmark_batch.sh
+rm profile_log
+for thread_num in 1 2 4 8 16
+do
+for batch_size in 1 2 4 8 16 32 64 128 256 512
+do
+    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "========================================"
+    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
+done
--- a/python/examples/imdb/local_train.py
+++ b/python/examples/imdb/local_train.py
@@ -35,6 +35,8 @@ def load_vocab(filename):


 if __name__ == "__main__":
+    from nets import lstm_net
+    model_name = "imdb_lstm"
    vocab = load_vocab('imdb.vocab')
    dict_dim = len(vocab)

@@ -50,8 +52,6 @@ if __name__ == "__main__":
    dataset.set_batch_size(128)
    dataset.set_filelist(filelist)
    dataset.set_thread(10)
-    from nets import lstm_net
-    model_name = "imdb_lstm"
    avg_cost, acc, prediction = lstm_net(data, label, dict_dim)
    optimizer = fluid.optimizer.SGD(learning_rate=0.01)
    optimizer.minimize(avg_cost)

--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -18,7 +18,7 @@ import sys

 client = Client()
 client.load_client_config(sys.argv[1])
-client.connect(["127.0.0.1:9393"])
+client.connect(["127.0.0.1:9292"])

 # you can define any english sentence or dataset here
 # This example reuses imdb reader in training, you
@@ -28,7 +28,7 @@ imdb_dataset.load_resource(sys.argv[2])

 for line in sys.stdin:
    word_ids, label = imdb_dataset.get_words_and_label(line)
-    feed = {"words": word_ids, "label": label}
+    feed = {"words": word_ids}
    fetch = ["acc", "cost", "prediction"]
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -35,6 +35,7 @@ class IMDBService(WebService):

 imdb_service = IMDBService(name="imdb")
 imdb_service.load_model_config(sys.argv[1])
-imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu")
-imdb_service.prepare_dict({"dict_file_path": sys.argv[3]})
+imdb_service.prepare_server(
+    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
 imdb_service.run_server()
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
@@ -29,9 +29,9 @@ with open(profile_file) as f:
    for line in f.readlines():
        line = line.strip().split("\t")
        if line[0] == "PROFILE":
-            prase(line[1])
+            prase(line[2])

 print("thread num {}".format(thread_num))
 for name in time_dict:
-    print("{} cost {} s per thread ".format(name, time_dict[name] / (
+    print("{} cost {} s in each thread ".format(name, time_dict[name] / (
        1000000.0 * float(thread_num))))
--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -31,6 +31,7 @@ def benchmark_args():
        help="endpoint of server")
    parser.add_argument(
        "--request", type=str, default="rpc", help="mode of service")
+    parser.add_argument("--batch_size", type=int, default=1, help="batch size")
    return parser.parse_args()