提交 2eac9709 编写于 作者: M MRXLT

fix imagenet benchmark

上级 689fadca
......@@ -3,7 +3,7 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog &
python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'`
sleep 5
gpu_id=0
......
......@@ -24,7 +24,7 @@ import json
import base64
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
from paddle_serving_client.utils import benchmark_args, show_latency
from paddle_serving_app.reader import Sequential, File2Image, Resize
from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
......@@ -38,7 +38,11 @@ seq_preprocess = Sequential([
def single_func(idx, resource):
file_list = []
turns = 10
turns = resource["turns"]
latency_flags = False
if os.getenv("FLAGS_serving_latency"):
latency_flags = True
latency_list = []
for file_name in os.listdir("./image_data/n01440764"):
file_list.append(file_name)
img_list = []
......@@ -56,6 +60,7 @@ def single_func(idx, resource):
start = time.time()
for i in range(turns):
if args.batch_size >= 1:
l_start = time.time()
feed_batch = []
i_start = time.time()
for bi in range(args.batch_size):
......@@ -69,6 +74,9 @@ def single_func(idx, resource):
int(round(i_end * 1000000))))
result = client.predict(feed=feed_batch, fetch=fetch)
l_end = time.time()
if latency_flags:
latency_list.append(l_end * 1000 - l_start * 1000)
else:
print("unsupport batch size {}".format(args.batch_size))
......@@ -88,6 +96,8 @@ def single_func(idx, resource):
r = requests.post(
server, data=req, headers={"Content-Type": "application/json"})
end = time.time()
if latency_flags:
return [[end - start], latency_list]
return [[end - start]]
......@@ -96,11 +106,21 @@ if __name__ == '__main__':
endpoint_list = [
"127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list})
turns = 100
start = time.time()
result = multi_thread_runner.run(
single_func, args.thread, {"endpoint": endpoint_list,
"turns": turns})
#result = single_func(0, {"endpoint": endpoint_list})
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("average total cost {} s.".format(avg_cost))
print("total cost: {}s".format(end - start))
print("each thread cost: {}s.".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
total_cost))
if os.getenv("FLAGS_serving_latency"):
show_latency(result[1])
......@@ -2,14 +2,14 @@ rm profile_log
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1
export FLAGS_profile_client=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
sleep 5
#warm up
$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
for thread_num in 4 8 16
for thread_num in 1 4 8 16
do
for batch_size in 1 4 16 64
do
......
......@@ -8,9 +8,9 @@ hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'`
sleep 5
for thread_num in 4 8 16
for thread_num in 1 4 8 16
do
for batch_size in 1 4 16 64 256
for batch_size in 1 4 16 64
do
job_bt=`date '+%Y%m%d%H%M%S'`
python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册