未验证 提交 4f6b84ae 编写于 作者: M MRXLT 提交者: GitHub

Merge pull request #281 from MRXLT/general-server-doc

refine demo script and readme
...@@ -12,29 +12,43 @@ python prepare_model.py ...@@ -12,29 +12,43 @@ python prepare_model.py
生成server端配置文件与模型文件,存放在serving_server_model文件夹 生成server端配置文件与模型文件,存放在serving_server_model文件夹
生成client端配置文件,存放在serving_client_conf文件夹 生成client端配置文件,存放在serving_client_conf文件夹
### 启动预测服务 ### 获取词典和样例数据
```
sh get_data.sh
```
脚本将下载中文词典vocab.txt和中文样例数据data-c.txt
### 启动RPC预测服务
执行 执行
``` ```
python bert_server.py serving_server_model 9292 #启动cpu预测服务 python -m paddle_serving_server.serve --model serving_server_model/ --port 9292 #启动cpu预测服务
``` ```
或者 或者
``` ```
python bert_gpu_server.py serving_server_model 9292 0 #在gpu 0上启动gpu预测服务 python -m paddle_serving_server_gpu.serve --model serving_server_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
``` ```
### 执行预测 ### 执行预测
执行
``` ```
sh get_data.sh python bert_rpc_client.py --thread 4
``` ```
获取中文样例数据 启动client读取data-c.txt中的数据进行预测,--thread参数控制client的进程数,预测结束后会打印出每个进程的耗时,server端的地址在脚本中修改。
### 启动HTTP预测服务
```
export CUDA_VISIBLE_DEVICES=0,1
```
通过环境变量指定gpu预测服务使用的gpu,示例中指定索引为0和1的两块gpu
```
python bert_web_service.py serving_server_model/ 9292 #启动gpu预测服务
```
### 执行预测
执行
``` ```
head data-c.txt | python bert_client.py curl -H "Content-Type:application/json" -X POST -d '{"words": "hello", "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
``` ```
将预测样例数据中的前十条样例,并将向量表示打印到标准输出。
### Benchmark ### Benchmark
......
...@@ -33,38 +33,45 @@ args = benchmark_args() ...@@ -33,38 +33,45 @@ args = benchmark_args()
def single_func(idx, resource): def single_func(idx, resource):
fin = open("data-c.txt") fin = open("data-c.txt")
dataset = []
for line in fin:
dataset.append(line.strip())
if args.request == "rpc": if args.request == "rpc":
reader = BertReader(vocab_file="vocab.txt", max_seq_len=128) reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
fetch = ["pooled_output"] fetch = ["pooled_output"]
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % 4]]) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time() start = time.time()
for line in fin: for i in range(1000):
feed_dict = reader.process(line) if args.batch_size == 1:
result = client.predict(feed=feed_dict, fetch=fetch) feed_dict = reader.process(dataset[i])
end = time.time() result = client.predict(feed=feed_dict, fetch=fetch)
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http": elif args.request == "http":
start = time.time() start = time.time()
header = {"Content-Type": "application/json"} header = {"Content-Type": "application/json"}
for line in fin: for i in range(1000):
dict_data = {"words": line, "fetch": ["pooled_output"]} dict_data = {"words": dataset[i], "fetch": ["pooled_output"]}
r = requests.post( r = requests.post(
'http://{}/bert/prediction'.format(resource["endpoint"][0]), 'http://{}/bert/prediction'.format(resource["endpoint"][
idx % len(resource["endpoint"])]),
data=json.dumps(dict_data), data=json.dumps(dict_data),
headers=header) headers=header)
end = time.time() end = time.time()
return [[end - start]] return [[end - start]]
if __name__ == '__main__': if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = [] endpoint_list = ["127.0.0.1:9292"]
card_num = 4
for i in range(args.thread):
endpoint_list.append("127.0.0.1:{}".format(9494 + i % card_num))
print(endpoint_list)
result = multi_thread_runner.run(single_func, args.thread, result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list}) {"endpoint": endpoint_list})
print(result) avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log rm profile_log
for thread_num in 1 4 8 12 16 20 24 for thread_num in 1 2 4 8 16
do do
$PYTHONROOT/bin/python benchmark.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1 $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log tail -n 1 profile >> profile_log
done done
...@@ -27,52 +27,45 @@ import tokenization ...@@ -27,52 +27,45 @@ import tokenization
import requests import requests
import json import json
from bert_reader import BertReader from bert_reader import BertReader
args = benchmark_args() args = benchmark_args()
batch_size = 24
def single_func(idx, resource): def single_func(idx, resource):
fin = open("data-c.txt") fin = open("data-c.txt")
dataset = []
for line in fin:
dataset.append(line.strip())
if args.request == "rpc": if args.request == "rpc":
reader = BertReader(vocab_file="vocab.txt", max_seq_len=128) reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
fetch = ["pooled_output"] fetch = ["pooled_output"]
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % 4]]) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time() start = time.time()
idx = 0 for i in range(1000):
batch_data = [] if args.batch_size >= 1:
for line in fin: feed_batch = []
feed_dict = reader.process(line) for bi in range(args.batch_size):
batch_data.append(feed_dict) feed_batch.append(reader.process(dataset[i]))
idx += 1
if idx % batch_size == 0:
result = client.batch_predict( result = client.batch_predict(
feed_batch=batch_data, fetch=fetch) feed_batch=feed_batch, fetch=fetch)
batch_data = [] else:
end = time.time() print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http": elif args.request == "http":
header = {"Content-Type": "application/json"} raise ("no batch predict for http")
for line in fin: end = time.time()
dict_data = {"words": line, "fetch": ["pooled_output"]}
r = requests.post(
'http://{}/bert/prediction'.format(resource["endpoint"][0]),
data=json.dumps(dict_data),
headers=header)
end = time.time()
return [[end - start]] return [[end - start]]
if __name__ == '__main__': if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = [] endpoint_list = ["127.0.0.1:9292"]
card_num = 4
for i in range(args.thread):
endpoint_list.append("127.0.0.1:{}".format(9494 + i % card_num))
print(endpoint_list)
result = multi_thread_runner.run(single_func, args.thread, result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list}) {"endpoint": endpoint_list})
print(result) avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log rm profile_log
thread_num=1 for thread_num in 1 2 4 8 16
for batch_size in 1 4 8 16 32 64 128 256
do do
$PYTHONROOT/bin/python benchmark_batch.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1 for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log tail -n 1 profile >> profile_log
done done
done
...@@ -34,5 +34,6 @@ bert_service.load_model_config(sys.argv[1]) ...@@ -34,5 +34,6 @@ bert_service.load_model_config(sys.argv[1])
gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
gpus = [int(x) for x in gpu_ids.split(",")] gpus = [int(x) for x in gpu_ids.split(",")]
bert_service.set_gpus(gpus) bert_service.set_gpus(gpus)
bert_service.prepare_server(workdir="workdir", port=9494, device="gpu") bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="gpu")
bert_service.run_server() bert_service.run_server()
wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
# CTR task on Criteo Dataset ## CTR预测服务
### 获取样例数据
```
sh get_data.sh
```
### 保存模型和配置文件
```
python local_train.py
```
执行脚本后会在当前目录生成serving_server_model和serving_client_config文件夹。
### 启动RPC预测服务
```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292
```
### 执行预测
```
python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
```
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from __future__ import unicode_literals, absolute_import
import os
import sys
import time
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
import requests
import json
import criteo_reader as criteo
args = benchmark_args()
def single_func(idx, resource):
batch = 1
buf_size = 100
dataset = criteo.CriteoDataset()
dataset.setup(1000001)
test_filelists = [
"./raw_data/part-%d" % x for x in range(len(os.listdir("./raw_data")))
]
reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
batch, buf_size)
if args.request == "rpc":
fetch = ["prob"]
client = Client()
client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
for i in range(1000):
if args.batch_size == 1:
data = reader().next()
feed_dict = {}
for i in range(1, 27):
feed_dict["sparse_{}".format(i - 1)] = data[0][i]
result = client.predict(feed=feed_dict, fetch=fetch)
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
raise ("Not support http service.")
end = time.time()
return [[end - start]]
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9292"]
#endpoint_list = endpoint_list + endpoint_list + endpoint_list
result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list})
#result = single_func(0, {"endpoint": endpoint_list})
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log
for thread_num in 1 2 4 8 16
do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ctr_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from __future__ import unicode_literals, absolute_import
import os
import sys
import time
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
import requests
import json
import criteo_reader as criteo
args = benchmark_args()
def single_func(idx, resource):
batch = 1
buf_size = 100
dataset = criteo.CriteoDataset()
dataset.setup(1000001)
test_filelists = [
"./raw_data/part-%d" % x for x in range(len(os.listdir("./raw_data")))
]
reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
batch, buf_size)
if args.request == "rpc":
fetch = ["prob"]
client = Client()
client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
feed_dict = {}
data = reader().next()
for i in range(1, 27):
feed_dict["sparse_{}".format(i - 1)] = data[0][i]
feed_batch.append(feed_dict)
result = client.batch_predict(
feed_batch=feed_batch, fetch=fetch)
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
raise ("no batch predict for http")
end = time.time()
return [[end - start]]
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9292"]
#endpoint_list = endpoint_list + endpoint_list + endpoint_list
result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list})
#result = single_func(0, {"endpoint": endpoint_list})
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log
for thread_num in 1 2 4 8 16
do
for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
done
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz
tar -zxvf *ctr_data.tar.gz tar -zxvf ctr_data.tar.gz
...@@ -17,6 +17,7 @@ from paddle_serving_client import Client ...@@ -17,6 +17,7 @@ from paddle_serving_client import Client
import paddle import paddle
import sys import sys
import os import os
import time
import criteo_reader as criteo import criteo_reader as criteo
from paddle_serving_client.metric import auc from paddle_serving_client.metric import auc
...@@ -34,12 +35,15 @@ test_filelists = [ ...@@ -34,12 +35,15 @@ test_filelists = [
] ]
reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:], batch, reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:], batch,
buf_size) buf_size)
label_list = [] label_list = []
prob_list = [] prob_list = []
for data in reader(): start = time.time()
for ei in range(1000):
data = reader().next()
feed_dict = {} feed_dict = {}
for i in range(1, 27): for i in range(1, 27):
feed_dict["sparse_{}".format(i - 1)] = data[0][i] feed_dict["sparse_{}".format(i - 1)] = data[0][i]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"]) fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
print(fetch_map) #print(fetch_map)
end = time.time()
print(end - start)
...@@ -2,26 +2,34 @@ ...@@ -2,26 +2,34 @@
示例中采用ResNet50_vd模型执行imagenet 1000分类任务。 示例中采用ResNet50_vd模型执行imagenet 1000分类任务。
### 模型及配置文件获取 ### 获取模型配置文件和样例数据
``` ```
sh get_model.sh sh get_model.sh
``` ```
### 执行wb service预测服务 ### 执行HTTP预测服务
启动server端 启动server端
``` ```
python image_classification_service.py conf_and_model/serving_server_model workdir 9393 python image_classification_service.py ResNet50_vd_model workdir 9393 #cpu预测服务
``` ```
```
python image_classification_service_gpu.py ResNet50_vd_model workdir 9393 #gpu预测服务
```
client端进行预测 client端进行预测
``` ```
python image_http_client.py python image_http_client.py
``` ```
### 执行rpc service预测服务 ### 执行RPC预测服务
启动server端 启动server端
``` ```
python -m paddle_serving_server.serve --model conf_and_model/serving_server_model/ --port 9393 python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9393 #cpu预测服务
```
```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9393 --gpu_ids 0 #gpu预测服务
``` ```
client端进行预测 client端进行预测
......
...@@ -18,23 +18,28 @@ from paddle_serving_client import Client ...@@ -18,23 +18,28 @@ from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args from paddle_serving_client.utils import benchmark_args
import time import time
import os
args = benchmark_args() args = benchmark_args()
def single_func(idx, resource): def single_func(idx, resource):
file_list = []
for file_name in os.listdir("./image_data/n01440764"):
file_list.append(file_name)
img_list = []
for i in range(1000):
img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
if args.request == "rpc": if args.request == "rpc":
reader = ImageReader() reader = ImageReader()
fetch = ["score"] fetch = ["score"]
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % 4]]) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time() start = time.time()
for i in range(1000): for i in range(1000):
with open("./data/n01440764_10026.JPEG") as f: img = reader.process_image(img_list[i]).reshape(-1)
img = f.read()
img = reader.process_image(img).reshape(-1)
fetch_map = client.predict(feed={"image": img}, fetch=["score"]) fetch_map = client.predict(feed={"image": img}, fetch=["score"])
end = time.time() end = time.time()
return [[end - start]] return [[end - start]]
...@@ -43,10 +48,14 @@ def single_func(idx, resource): ...@@ -43,10 +48,14 @@ def single_func(idx, resource):
if __name__ == "__main__": if __name__ == "__main__":
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = [] endpoint_list = ["127.0.0.1:9393"]
card_num = 4 #card_num = 4
for i in range(args.thread): #for i in range(args.thread):
endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num)) # endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))
result = multi_thread_runner.run(single_func, args.thread, result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list}) {"endpoint": endpoint_list})
print(result) avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log
for thread_num in 1 2 4 8 16
do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from __future__ import unicode_literals, absolute_import
import os
import sys
import time
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
import requests
import json
from image_reader import ImageReader
args = benchmark_args()
def single_func(idx, resource):
file_list = []
for file_name in os.listdir("./image_data/n01440764"):
file_list.append(file_name)
img_list = []
for i in range(1000):
img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
if args.request == "rpc":
reader = ImageReader()
fetch = ["score"]
client = Client()
client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
img = reader.process_image(img_list[i])
img = img.reshape(-1)
feed_batch.append({"image": img})
result = client.batch_predict(
feed_batch=feed_batch, fetch=fetch)
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
raise ("no batch predict for http")
end = time.time()
return [[end - start]]
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9393"]
#endpoint_list = endpoint_list + endpoint_list + endpoint_list
result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list})
#result = single_func(0, {"endpoint": endpoint_list})
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
rm profile_log
for thread_num in 1 2 4 8 16
do
for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model ResNet101_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
done
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/conf_and_model.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz
tar -xzvf conf_and_model.tar.gz tar -xzvf ResNet50_vd.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet101_vd.tar.gz
tar -xzvf ResNet101_vd.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/image_data.tar.gz
tar -xzvf imgae_data.tar.gz
...@@ -26,11 +26,10 @@ def predict(image_path, server): ...@@ -26,11 +26,10 @@ def predict(image_path, server):
if __name__ == "__main__": if __name__ == "__main__":
server = "http://127.0.0.1:9292/image/prediction" server = "http://127.0.0.1:9393/image/prediction"
image_path = "./data/n01440764_10026.JPEG" image_path = "./data/n01440764_10026.JPEG"
start = time.time() start = time.time()
for i in range(1000): for i in range(1000):
predict(image_path, server) predict(image_path, server)
print(i)
end = time.time() end = time.time()
print(end - start) print(end - start)
### 使用方法 ## IMDB评论情绪预测服务
假设数据文件为test.data,配置文件为inference.conf
单进程client ### 获取模型文件和样例数据
```
sh get_data.sh
```
脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
### 启动RPC预测服务
``` ```
cat test.data | python test_client.py inference.conf > result python -m paddle_serving_server.serve --model imdb_bow_model/ --port 9292
``` ```
多进程client,若进程数为4 ### 执行预测
``` ```
python test_client_multithread.py inference.conf test.data 4 > result head test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
``` ```
batch clienit,若batch size为4 预测test_data/part-0的前十个样例。
### 启动HTTP预测服务
```
python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
```
### 执行预测
``` ```
cat test.data | python test_client_batch.py inference.conf 4 > result curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
``` ```
### Benchmark ### Benchmark
......
...@@ -26,24 +26,24 @@ args = benchmark_args() ...@@ -26,24 +26,24 @@ args = benchmark_args()
def single_func(idx, resource): def single_func(idx, resource):
imdb_dataset = IMDBDataset() imdb_dataset = IMDBDataset()
imdb_dataset.load_resource(args.vocab) imdb_dataset.load_resource("./imdb.vocab")
filelist_fn = args.filelist dataset = []
filelist = [] with open("./test_data/part-0") as fin:
start = time.time()
with open(filelist_fn) as fin:
for line in fin: for line in fin:
filelist.append(line.strip()) dataset.append(line.strip())
filelist = filelist[idx::args.thread] start = time.time()
if args.request == "rpc": if args.request == "rpc":
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([args.endpoint]) client.connect([args.endpoint])
for fn in filelist: for i in range(1000):
fin = open(fn) if args.batch_size == 1:
for line in fin:
word_ids, label = imdb_dataset.get_words_and_label(line) word_ids, label = imdb_dataset.get_words_and_label(line)
fetch_map = client.predict( fetch_map = client.predict(
feed={"words": word_ids}, fetch=["prediction"]) feed={"words": word_ids}, fetch=["prediction"])
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http": elif args.request == "http":
for fn in filelist: for fn in filelist:
fin = open(fn) fin = open(fn)
......
rm profile_log
for thread_num in 1 2 4 8 16
do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
...@@ -11,77 +11,55 @@ ...@@ -11,77 +11,55 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing
import sys import sys
import time
import requests
from imdb_reader import IMDBDataset
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.metric import auc
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
import time from paddle_serving_client.utils import benchmark_args
args = benchmark_args()
def predict(thr_id, resource): def single_func(idx, resource):
client = Client() imdb_dataset = IMDBDataset()
client.load_client_config(resource["conf_file"]) imdb_dataset.load_resource("./imdb.vocab")
client.connect(resource["server_endpoint"])
thread_num = resource["thread_num"]
file_list = resource["filelist"]
line_id = 0
prob = []
label_list = []
dataset = [] dataset = []
for fn in file_list: with open("./test_data/part-0") as fin:
fin = open(fn)
for line in fin: for line in fin:
if line_id % thread_num == thr_id - 1: dataset.append(line.strip())
group = line.strip().split()
words = [int(x) for x in group[1:int(group[0])]]
label = [int(group[-1])]
feed = {"words": words, "label": label}
dataset.append(feed)
line_id += 1
fin.close()
start = time.time() start = time.time()
fetch = ["acc", "cost", "prediction"] if args.request == "rpc":
infer_time_list = [] client = Client()
counter = 0 client.load_client_config(args.model)
feed_list = [] client.connect([args.endpoint])
for inst in dataset: for i in range(1000):
counter += 1 if args.batch_size >= 1:
feed_list.append(inst) feed_batch = []
if counter == resource["batch_size"]: for bi in range(args.batch_size):
fetch_map_batch, infer_time = client.batch_predict( word_ids, label = imdb_dataset.get_words_and_label(line)
feed_batch=feed_list, fetch=fetch, profile=True) feed_batch.append({"words": word_ids})
#prob.append(fetch_map["prediction"][1]) result = client.batch_predict(
#label_list.append(label[0]) feed_batch=feed_batch, fetch=["prediction"])
infer_time_list.append(infer_time) else:
counter = 0 print("unsupport batch size {}".format(args.batch_size))
feed_list = []
if counter != 0:
fetch_map_batch, infer_time = client.batch_predict(
feed_batch=feed_list, fetch=fetch, profile=True)
infer_time_list.append(infer_time)
elif args.request == "http":
for fn in filelist:
fin = open(fn)
for line in fin:
word_ids, label = imdb_dataset.get_words_and_label(line)
r = requests.post(
"http://{}/imdb/prediction".format(args.endpoint),
data={"words": word_ids,
"fetch": ["prediction"]})
end = time.time() end = time.time()
client.release() return [[end - start]]
return [prob, label_list, [sum(infer_time_list)], [end - start]]
if __name__ == '__main__':
conf_file = sys.argv[1]
data_file = sys.argv[2]
resource = {}
resource["conf_file"] = conf_file
resource["server_endpoint"] = ["127.0.0.1:9292"]
resource["filelist"] = [data_file]
resource["thread_num"] = int(sys.argv[3])
resource["batch_size"] = int(sys.argv[4])
thread_runner = MultiThreadRunner()
result = thread_runner.run(predict, int(sys.argv[3]), resource)
print("thread num {}\tbatch size {}\ttotal time {}".format(sys.argv[ multi_thread_runner = MultiThreadRunner()
3], resource["batch_size"], sum(result[-1]) / len(result[-1]))) result = multi_thread_runner.run(single_func, args.thread, {})
print("thread num {}\tbatch size {}\tinfer time {}".format( print(result)
sys.argv[3], resource["batch_size"],
sum(result[2]) / 1000.0 / 1000.0 / len(result[2])))
rm profile_log
for thread_num in 1 2 4 8 16
do
for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
done
...@@ -35,6 +35,8 @@ def load_vocab(filename): ...@@ -35,6 +35,8 @@ def load_vocab(filename):
if __name__ == "__main__": if __name__ == "__main__":
from nets import lstm_net
model_name = "imdb_lstm"
vocab = load_vocab('imdb.vocab') vocab = load_vocab('imdb.vocab')
dict_dim = len(vocab) dict_dim = len(vocab)
...@@ -50,8 +52,6 @@ if __name__ == "__main__": ...@@ -50,8 +52,6 @@ if __name__ == "__main__":
dataset.set_batch_size(128) dataset.set_batch_size(128)
dataset.set_filelist(filelist) dataset.set_filelist(filelist)
dataset.set_thread(10) dataset.set_thread(10)
from nets import lstm_net
model_name = "imdb_lstm"
avg_cost, acc, prediction = lstm_net(data, label, dict_dim) avg_cost, acc, prediction = lstm_net(data, label, dict_dim)
optimizer = fluid.optimizer.SGD(learning_rate=0.01) optimizer = fluid.optimizer.SGD(learning_rate=0.01)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
......
...@@ -18,7 +18,7 @@ import sys ...@@ -18,7 +18,7 @@ import sys
client = Client() client = Client()
client.load_client_config(sys.argv[1]) client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9393"]) client.connect(["127.0.0.1:9292"])
# you can define any english sentence or dataset here # you can define any english sentence or dataset here
# This example reuses imdb reader in training, you # This example reuses imdb reader in training, you
...@@ -28,7 +28,7 @@ imdb_dataset.load_resource(sys.argv[2]) ...@@ -28,7 +28,7 @@ imdb_dataset.load_resource(sys.argv[2])
for line in sys.stdin: for line in sys.stdin:
word_ids, label = imdb_dataset.get_words_and_label(line) word_ids, label = imdb_dataset.get_words_and_label(line)
feed = {"words": word_ids, "label": label} feed = {"words": word_ids}
fetch = ["acc", "cost", "prediction"] fetch = ["acc", "cost", "prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch) fetch_map = client.predict(feed=feed, fetch=fetch)
print("{} {}".format(fetch_map["prediction"][1], label[0])) print("{} {}".format(fetch_map["prediction"][1], label[0]))
...@@ -35,6 +35,7 @@ class IMDBService(WebService): ...@@ -35,6 +35,7 @@ class IMDBService(WebService):
imdb_service = IMDBService(name="imdb") imdb_service = IMDBService(name="imdb")
imdb_service.load_model_config(sys.argv[1]) imdb_service.load_model_config(sys.argv[1])
imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu") imdb_service.prepare_server(
imdb_service.prepare_dict({"dict_file_path": sys.argv[3]}) workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
imdb_service.run_server() imdb_service.run_server()
...@@ -29,9 +29,9 @@ with open(profile_file) as f: ...@@ -29,9 +29,9 @@ with open(profile_file) as f:
for line in f.readlines(): for line in f.readlines():
line = line.strip().split("\t") line = line.strip().split("\t")
if line[0] == "PROFILE": if line[0] == "PROFILE":
prase(line[1]) prase(line[2])
print("thread num {}".format(thread_num)) print("thread num {}".format(thread_num))
for name in time_dict: for name in time_dict:
print("{} cost {} s per thread ".format(name, time_dict[name] / ( print("{} cost {} s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num)))) 1000000.0 * float(thread_num))))
...@@ -31,6 +31,7 @@ def benchmark_args(): ...@@ -31,6 +31,7 @@ def benchmark_args():
help="endpoint of server") help="endpoint of server")
parser.add_argument( parser.add_argument(
"--request", type=str, default="rpc", help="mode of service") "--request", type=str, default="rpc", help="mode of service")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
return parser.parse_args() return parser.parse_args()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册