未验证 提交 f92e6a52 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #1231 from HexToString/develop-p

add resnet50_benchmark
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from __future__ import unicode_literals, absolute_import
import os
import sys
import time
import json
import requests
import numpy as np
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
args = benchmark_args()
def single_func(idx, resource):
total_number = 0
profile_flags = False
latency_flags = False
if os.getenv("FLAGS_profile_client"):
profile_flags = True
if os.getenv("FLAGS_serving_latency"):
latency_flags = True
latency_list = []
if args.request == "rpc":
client = Client()
client.load_client_config(args.model)
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
for i in range(turns):
if args.batch_size >= 1:
l_start = time.time()
seq = Sequential([
File2Image(), Resize(256), CenterCrop(224), RGB2BGR(),
Transpose((2, 0, 1)), Div(255), Normalize(
[0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
])
image_file = "daisy.jpg"
img = seq(image_file)
feed_data = np.array(img)
feed_data = np.expand_dims(feed_data, 0).repeat(
args.batch_size, axis=0)
result = client.predict(
feed={"image": feed_data},
fetch=["save_infer_model/scale_0.tmp_0"],
batch=True)
l_end = time.time()
if latency_flags:
latency_list.append(l_end * 1000 - l_start * 1000)
total_number = total_number + 1
else:
print("unsupport batch size {}".format(args.batch_size))
else:
raise ValueError("not implemented {} request".format(args.request))
end = time.time()
if latency_flags:
return [[end - start], latency_list, [total_number]]
else:
return [[end - start]]
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9393"]
turns = 1
start = time.time()
result = multi_thread_runner.run(
single_func, args.thread, {"endpoint": endpoint_list,
"turns": turns})
end = time.time()
total_cost = end - start
total_number = 0
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
total_number += result[2][i]
avg_cost = avg_cost / args.thread
print("total cost-include init: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * total_number / (
avg_cost * args.thread)))
print("qps(request): {}samples/s".format(total_number / (avg_cost *
args.thread)))
print("total count: {} ".format(total_number))
if os.getenv("FLAGS_serving_latency"):
show_latency(result[1])
rm profile_log*
rm -rf resnet_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1
export FLAGS_profile_client=1
export FLAGS_serving_latency=1
gpu_id=3
#save cpu and gpu utilization log
if [ -d utilization ];then
rm -rf utilization
else
mkdir utilization
fi
#start server
python3.6 -m paddle_serving_server.serve --model $1 --port 9393 --thread 10 --gpu_ids $gpu_id --use_trt --ir_optim > elog 2>&1 &
sleep 15
#warm up
python3.6 benchmark.py --thread 1 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py
for thread_num in 1 2 4 8 16
do
for batch_size in 1 4 8 16 32
do
job_bt=`date '+%Y%m%d%H%M%S'`
nvidia-smi --id=$gpu_id --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 &
nvidia-smi --id=$gpu_id --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
rm -rf cpu.txt
python3.6 cpu.py &
gpu_memory_pid=$!
python3.6 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null
kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null
kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null
echo "model_name:" $1
echo "thread_num:" $thread_num
echo "batch_size:" $batch_size
echo "=================Done===================="
echo "model_name:$1" >> profile_log_$1
echo "batch_size:$batch_size" >> profile_log_$1
job_et=`date '+%Y%m%d%H%M%S'`
awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1
#awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1
#awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1
grep -av '^0 %' gpu_utilization.log > gpu_utilization.log.tmp
awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1
awk -F" " '{sum+=$1} END {print "GPU_UTILIZATION:", sum/NR, sum, NR }' gpu_utilization.log.tmp >> profile_log_$1
rm -rf gpu_memory_use.log gpu_utilization.log gpu_utilization.log.tmp
python3.6 ../util/show_profile.py profile $thread_num >> profile_log_$1
tail -n 10 profile >> profile_log_$1
echo "" >> profile_log_$1
done
done
#Divided log
awk 'BEGIN{RS="\n\n"}{i++}{print > "resnet_log_"i}' profile_log_$1
mkdir resnet_log && mv resnet_log_* resnet_log
ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
if [ ! -x "ResNet50.tar.gz"]; then
wget https://paddle-inference-dist.bj.bcebos.com/AI-Rank/models/Paddle/ResNet50.tar.gz
fi
tar -xzvf ResNet50.tar.gz
python3.6 -m paddle_serving_client.convert --dirname ./ResNet50 --model_filename model --params_filename params
bash benchmark.sh serving_server serving_client
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册