未验证 提交 08b5ffbf 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #1175 from HexToString/benchmark_merge

Benchmark merge
......@@ -149,6 +149,8 @@ def __init__(name=None,
### 2. 普通 OP二次开发接口
OP 二次开发的目的是满足业务开发人员控制OP处理策略。
......
......@@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```
## Benchmark
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
The output log file of benchmark named `profile_log_bert_seq128_model`
......@@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```
## 性能测试
``` shell
bash benchmark.sh bert_seq128_model bert_seq128_client
```
性能测试的日志文件为profile_log_bert_seq128_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
......@@ -21,6 +21,7 @@ import sys
import time
import json
import requests
import numpy as np
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
......@@ -56,7 +57,11 @@ def single_func(idx, resource):
feed_batch = []
b_start = time.time()
for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi]))
feed_dict = reader.process(dataset[bi])
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(1, 128, 1))
feed_batch.append(feed_dict)
b_end = time.time()
if profile_flags:
......@@ -116,9 +121,7 @@ def single_func(idx, resource):
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = [
"127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"]
turns = 100
start = time.time()
result = multi_thread_runner.run(
......
rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0,1
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
......@@ -12,7 +12,7 @@ else
mkdir utilization
fi
#start server
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 &
sleep 5
#warm up
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export CUDA_VISIBLE_DEVICES=0,1
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog &
export FLAGS_profile_client=1
export FLAGS_profile_server=1
sleep 5
......
......@@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```
## Benchmark
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
The log file of benchmark named `profile_log_uci_housing_model`
......@@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
```
## 性能测试
``` shell
bash benchmark.sh uci_housing_model uci_housing_client
```
性能测试的日志文件为profile_log_uci_housing_model
如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。
......@@ -15,7 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
from paddle_serving_client.utils import benchmark_args, show_latency
import time
import paddle
import sys
......@@ -37,9 +37,6 @@ def single_func(idx, resource):
client.connect([args.endpoint])
start = time.time()
for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time()
return [[end - start], [total_number]]
......@@ -57,6 +54,17 @@ def single_func(idx, resource):
return [[end - start], [total_number]]
start = time.time()
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {})
print(result)
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("total cost: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost))
show_latency(result[1])
......@@ -5,6 +5,7 @@ import collections
profile_file = sys.argv[1]
thread_num = sys.argv[2]
time_dict = collections.OrderedDict()
query_count = 0
def prase(line):
......@@ -26,12 +27,15 @@ def prase(line):
with open(profile_file) as f:
query_count = 0
for line in f.readlines():
line = line.strip().split("\t")
if line[0] == "PROFILE":
prase(line[2])
query_count += 1
print("thread_num: {}".format(thread_num))
print("query_count: {}".format(query_count))
for name in time_dict:
print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num))))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册