提交 61f847b3 编写于 作者: M MRXLT 提交者: GitHub

Merge pull request #705 from gentelyang/develop

fix benchmark
...@@ -116,8 +116,10 @@ def single_func(idx, resource): ...@@ -116,8 +116,10 @@ def single_func(idx, resource):
if __name__ == '__main__': if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9292"] endpoint_list = [
turns = 10 "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
]
turns = 100
start = time.time() start = time.time()
result = multi_thread_runner.run( result = multi_thread_runner.run(
single_func, args.thread, {"endpoint": endpoint_list, single_func, args.thread, {"endpoint": endpoint_list,
...@@ -130,9 +132,9 @@ if __name__ == '__main__': ...@@ -130,9 +132,9 @@ if __name__ == '__main__':
avg_cost += result[0][i] avg_cost += result[0][i]
avg_cost = avg_cost / args.thread avg_cost = avg_cost / args.thread
print("total cost :{} s".format(total_cost)) print("total cost: {}s".format(total_cost))
print("each thread cost :{} s. ".format(avg_cost)) print("each thread cost: {}s. ".format(avg_cost))
print("qps :{} samples/s".format(args.batch_size * args.thread * turns / print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
total_cost)) total_cost))
if os.getenv("FLAGS_serving_latency"): if os.getenv("FLAGS_serving_latency"):
show_latency(result[1]) show_latency(result[1])
...@@ -4,8 +4,9 @@ export FLAGS_profile_server=1 ...@@ -4,8 +4,9 @@ export FLAGS_profile_server=1
export FLAGS_profile_client=1 export FLAGS_profile_client=1
export FLAGS_serving_latency=1 export FLAGS_serving_latency=1
python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog & python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog &
hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'`
sleep 5 sleep 5
gpu_id=0
#warm up #warm up
python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
...@@ -14,14 +15,24 @@ for thread_num in 4 8 16 ...@@ -14,14 +15,24 @@ for thread_num in 4 8 16
do do
for batch_size in 1 4 16 64 256 for batch_size in 1 4 16 64 256
do do
job_bt=`date '+%Y%m%d%H%M%S'`
nvidia-smi --id=$gpu_id --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
gpu_memory_pid=$!
python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "model name :" $1 kill ${gpu_memory_pid}
echo "thread num :" $thread_num echo "model_name:" $1
echo "batch size :" $batch_size echo "thread_num:" $thread_num
echo "batch_size:" $batch_size
echo "=================Done====================" echo "=================Done===================="
echo "model name :$1" >> profile_log_$1 echo "model_name:$1" >> profile_log_$1
echo "batch size :$batch_size" >> profile_log_$1 echo "batch_size:$batch_size" >> profile_log_$1
python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 job_et=`date '+%Y%m%d%H%M%S'`
awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY_USE:", max}' gpu_use.log >> profile_log_$1
monquery -n ${hostname} -i GPU_AVERAGE_UTILIZATION -s $job_bt -e $job_et -d 10 > gpu_log_file_${job_bt}
monquery -n ${hostname} -i CPU_USER -s $job_bt -e $job_et -d 10 > cpu_log_file_${job_bt}
cpu_num=$(cat /proc/cpuinfo | grep processor | wc -l)
gpu_num=$(nvidia-smi -L|wc -l)
python ../util/show_profile.py profile $thread_num >> profile_log_$1
tail -n 8 profile >> profile_log_$1 tail -n 8 profile >> profile_log_$1
echo "" >> profile_log_$1 echo "" >> profile_log_$1
done done
......
...@@ -13,13 +13,14 @@ ...@@ -13,13 +13,14 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import os
import sys import sys
import time import time
import requests import requests
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args from paddle_serving_client.utils import MultiThreadRunner, benchmark_args, show_latency
args = benchmark_args() args = benchmark_args()
...@@ -31,6 +32,13 @@ def single_func(idx, resource): ...@@ -31,6 +32,13 @@ def single_func(idx, resource):
with open("./test_data/part-0") as fin: with open("./test_data/part-0") as fin:
for line in fin: for line in fin:
dataset.append(line.strip()) dataset.append(line.strip())
profile_flags = False
latency_flags = False
if os.getenv("FLAGS_profile_client"):
profile_flags = True
if os.getenv("FLAGS_serving_latency"):
latency_flags = True
latency_list = []
start = time.time() start = time.time()
if args.request == "rpc": if args.request == "rpc":
client = Client() client = Client()
...@@ -67,9 +75,26 @@ def single_func(idx, resource): ...@@ -67,9 +75,26 @@ def single_func(idx, resource):
return [[end - start]] return [[end - start]]
multi_thread_runner = MultiThreadRunner() if __name__ == '__main__':
result = multi_thread_runner.run(single_func, args.thread, {}) multi_thread_runner = MultiThreadRunner()
avg_cost = 0 endpoint_list = [
for cost in result[0]: "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
avg_cost += cost ]
print("total cost {} s of each thread".format(avg_cost / args.thread)) turns = 100
start = time.time()
result = multi_thread_runner.run(
single_func, args.thread, {"endpoint": endpoint_list,
"turns": turns})
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("total cost: {}".format(total_cost))
print("each thread cost: {}".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
total_cost))
if os.getenv("FLAGS_serving_latency"):
show_latency(result[0])
rm profile_log rm profile_log
for thread_num in 1 2 4 8 16 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'`
sleep 5
for thread_num in 4 8 16
do do
for batch_size in 1 2 4 8 16 32 64 128 256 512 for batch_size in 1 4 16 64 256
do do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 job_bt=`date '+%Y%m%d%H%M%S'`
echo "========================================" python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "batch size : $batch_size" >> profile_log echo "model_name:" $1
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log echo "thread_num:" $thread_num
tail -n 1 profile >> profile_log echo "batch_size:" $batch_size
echo "=================Done===================="
echo "model_name:$1" >> profile_log_$1
echo "batch_size:$batch_size" >> profile_log_$1
job_et=`date '+%Y%m%d%H%M%S'`
awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY_USE:", max}' gpu_use.log >> profile_log_$1
monquery -n ${hostname} -i GPU_AVERAGE_UTILIZATION -s $job_bt -e $job_et -d 10 > gpu_log_file_${job_bt}
monquery -n ${hostname} -i CPU_USER -s $job_bt -e $job_et -d 10 > cpu_log_file_${job_bt}
cpu_num=$(cat /proc/cpuinfo | grep processor | wc -l)
gpu_num=$(nvidia-smi -L|wc -l)
python ../util/show_profile.py profile $thread_num >> profile_log_$1
tail -n 8 profile >> profile_log_$1
echo "" >> profile_log_$1
done done
done done
ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
...@@ -31,7 +31,7 @@ with open(profile_file) as f: ...@@ -31,7 +31,7 @@ with open(profile_file) as f:
if line[0] == "PROFILE": if line[0] == "PROFILE":
prase(line[2]) prase(line[2])
print("thread num :{}".format(thread_num)) print("thread_num: {}".format(thread_num))
for name in time_dict: for name in time_dict:
print("{} cost :{} s in each thread ".format(name, time_dict[name] / ( print("{} cost: {}s in each thread ".format(name, time_dict[name] / (
1000000.0 * float(thread_num)))) 1000000.0 * float(thread_num))))
...@@ -39,11 +39,11 @@ def benchmark_args(): ...@@ -39,11 +39,11 @@ def benchmark_args():
def show_latency(latency_list): def show_latency(latency_list):
latency_array = np.array(latency_list) latency_array = np.array(latency_list)
info = "latency:\n" info = "latency:\n"
info += "mean :{} ms\n".format(np.mean(latency_array)) info += "mean: {}ms\n".format(np.mean(latency_array))
info += "median :{} ms\n".format(np.median(latency_array)) info += "median: {}ms\n".format(np.median(latency_array))
info += "80 percent :{} ms\n".format(np.percentile(latency_array, 80)) info += "80 percent: {}ms\n".format(np.percentile(latency_array, 80))
info += "90 percent :{} ms\n".format(np.percentile(latency_array, 90)) info += "90 percent: {}ms\n".format(np.percentile(latency_array, 90))
info += "99 percent :{} ms\n".format(np.percentile(latency_array, 99)) info += "99 percent: {}ms\n".format(np.percentile(latency_array, 99))
sys.stderr.write(info) sys.stderr.write(info)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册