提交 09ab2fba 编写于 作者: G guru4elephant

refine benchmark_batch.py

上级 22dd55e4
...@@ -41,13 +41,13 @@ def single_func(idx, resource): ...@@ -41,13 +41,13 @@ def single_func(idx, resource):
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
feed_batch = []
for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi]))
start = time.time() start = time.time()
for i in range(1000): for i in range(1000):
if args.batch_size >= 1: if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[i]))
result = client.batch_predict( result = client.batch_predict(
feed_batch=feed_batch, fetch=fetch) feed_batch=feed_batch, fetch=fetch)
else: else:
...@@ -61,7 +61,9 @@ def single_func(idx, resource): ...@@ -61,7 +61,9 @@ def single_func(idx, resource):
if __name__ == '__main__': if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9292"] endpoint_list = [
"127.0.0.1:9295", "127.0.0.1:9296", "127.0.0.1:9297", "127.0.0.1:9298"
]
result = multi_thread_runner.run(single_func, args.thread, result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list}) {"endpoint": endpoint_list})
avg_cost = 0 avg_cost = 0
......
rm profile_log rm profile_log
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
sleep 5
for thread_num in 1 2 4 8 16 for thread_num in 1 2 4 8 16
do do
for batch_size in 1 2 4 8 16 32 64 128 256 512 for batch_size in 1 2 4 8 16 32 64 128 256 512
do do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================" echo "========================================"
echo "thread num: ", $thread_num
echo "batch size: ", $batch_size
echo "batch size : $batch_size" >> profile_log echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log tail -n 1 profile >> profile_log
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export FLAGS_profile_client=1
export FLAGS_profile_server=1
sleep 5
thread_num=4
python benchmark_batch.py --thread ${thread_num} --batch_size 64 --model serving_client_conf/serving_client_conf.prototxt 2> profile
python show_profile.py profile ${thread_num}
python timeline_trace.py profile trace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册