提交 d78f545c 编写于 作者: M MRXLT

add ce script

上级 54597c48
...@@ -21,7 +21,7 @@ import sys ...@@ -21,7 +21,7 @@ import sys
import time import time
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args from paddle_serving_client.utils import benchmark_args, show_latency
from batching import pad_batch_data from batching import pad_batch_data
import tokenization import tokenization
import requests import requests
...@@ -35,11 +35,18 @@ def single_func(idx, resource): ...@@ -35,11 +35,18 @@ def single_func(idx, resource):
dataset = [] dataset = []
for line in fin: for line in fin:
dataset.append(line.strip()) dataset.append(line.strip())
profile_flags = False profile_flags = False
latency_flags = False
if os.getenv("FLAGS_profile_client"): if os.getenv("FLAGS_profile_client"):
profile_flags = True profile_flags = True
if os.getenv("FLAGS_serving_latency"):
latency_flags = True
latency_list = []
if args.request == "rpc": if args.request == "rpc":
reader = BertReader(vocab_file="vocab.txt", max_seq_len=20) reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
fetch = ["pooled_output"] fetch = ["pooled_output"]
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
...@@ -47,11 +54,13 @@ def single_func(idx, resource): ...@@ -47,11 +54,13 @@ def single_func(idx, resource):
start = time.time() start = time.time()
for i in range(turns): for i in range(turns):
if args.batch_size >= 1: if args.batch_size >= 1:
l_start = time.time()
feed_batch = [] feed_batch = []
b_start = time.time() b_start = time.time()
for bi in range(args.batch_size): for bi in range(args.batch_size):
feed_batch.append(reader.process(dataset[bi])) feed_batch.append(reader.process(dataset[bi]))
b_end = time.time() b_end = time.time()
if profile_flags: if profile_flags:
sys.stderr.write( sys.stderr.write(
"PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format( "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format(
...@@ -59,13 +68,17 @@ def single_func(idx, resource): ...@@ -59,13 +68,17 @@ def single_func(idx, resource):
int(round(b_start * 1000000)), int(round(b_start * 1000000)),
int(round(b_end * 1000000)))) int(round(b_end * 1000000))))
result = client.predict(feed=feed_batch, fetch=fetch) result = client.predict(feed=feed_batch, fetch=fetch)
l_end = time.time()
if latency_flags:
latency_list.append(l_end * 1000 - l_start * 1000)
else: else:
print("unsupport batch size {}".format(args.batch_size)) print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http": elif args.request == "http":
raise ("not implemented") raise ("not implemented")
end = time.time() end = time.time()
return [[end - start]] return [[end - start], latency_list]
if __name__ == '__main__': if __name__ == '__main__':
...@@ -78,13 +91,17 @@ if __name__ == '__main__': ...@@ -78,13 +91,17 @@ if __name__ == '__main__':
result = multi_thread_runner.run( result = multi_thread_runner.run(
single_func, args.thread, {"endpoint": endpoint_list, single_func, args.thread, {"endpoint": endpoint_list,
"turns": turns}) "turns": turns})
end = time.time()
total_cost = end - start
avg_cost = 0 avg_cost = 0
for i in range(args.thread): for i in range(args.thread):
avg_cost += result[0][i] avg_cost += result[0][i]
avg_cost = avg_cost / args.thread avg_cost = avg_cost / args.thread
end = time.time()
total_cost = end - start
print("total cost :{} s".format(total_cost)) print("total cost :{} s".format(total_cost))
print("each thread cost :{} s. ".format(avg_cost)) print("each thread cost :{} s. ".format(avg_cost))
print("qps :{} samples/s".format(args.batch_size * args.thread * turns / print("qps :{} samples/s".format(args.batch_size * args.thread * turns /
total_cost)) total_cost))
if os.getenv("FLAGS_serving_latency"):
show_latency(result[1])
...@@ -2,24 +2,28 @@ rm profile_log ...@@ -2,24 +2,28 @@ rm profile_log
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1 export FLAGS_profile_server=1
export FLAGS_profile_client=1 export FLAGS_profile_client=1
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & export FLAGS_serving_latency=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
sleep 5 sleep 5
#warm up #warm up
$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model ./bert_seq20_client/serving_client_conf.prototxt --request rpc > profile 2>&1 $PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
for thread_num in 8 16 32 for thread_num in 4 8 16
do do
for batch_size in 1 4 16 64 256 for batch_size in 1 4 16 64 256
do do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model ./bert_seq20_client/serving_client_conf.prototxt --request rpc > profile 2>&1 $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "model name :" $1
echo "thread num :" $thread_num echo "thread num :" $thread_num
echo "batch size :" $batch_size echo "batch size :" $batch_size
echo "=================Done====================" echo "=================Done===================="
echo "model name :$1" >> profile_log
echo "batch size :$batch_size" >> profile_log echo "batch size :$batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 3 profile >> profile_log tail -n 8 profile >> profile_log
echo "" >> profile_log
done done
done done
......
...@@ -17,10 +17,11 @@ import paddle.fluid as fluid ...@@ -17,10 +17,11 @@ import paddle.fluid as fluid
import sys import sys
import paddle_serving_client.io as serving_io import paddle_serving_client.io as serving_io
model_name = "bert_chinese_L-12_H-768_A-12" #model_name = "bert_chinese_L-12_H-768_A-12"
model_name = sys.argv[1]
module = hub.Module(model_name) module = hub.Module(model_name)
inputs, outputs, program = module.context( inputs, outputs, program = module.context(
trainable=True, max_seq_len=int(sys.argv[1])) trainable=True, max_seq_len=int(sys.argv[2]))
place = fluid.core_avx.CPUPlace() place = fluid.core_avx.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
input_ids = inputs["input_ids"] input_ids = inputs["input_ids"]
...@@ -37,8 +38,8 @@ feed_var_names = [ ...@@ -37,8 +38,8 @@ feed_var_names = [
target_vars = [pooled_output, sequence_output] target_vars = [pooled_output, sequence_output]
serving_io.save_model( serving_io.save_model(
"bert_seq{}_model".format(sys.argv[1]), "{}_seq{}_model".format(sys.argv[1], sys.argv[2]),
"bert_seq{}_client".format(sys.argv[1]), { "{}_seq{}_client".format(sys.argv[1], sys.argv[2]), {
"input_ids": input_ids, "input_ids": input_ids,
"position_ids": position_ids, "position_ids": position_ids,
"segment_ids": segment_ids, "segment_ids": segment_ids,
......
...@@ -17,6 +17,7 @@ import sys ...@@ -17,6 +17,7 @@ import sys
import subprocess import subprocess
import argparse import argparse
from multiprocessing import Pool from multiprocessing import Pool
import numpy as np
def benchmark_args(): def benchmark_args():
...@@ -35,6 +36,17 @@ def benchmark_args(): ...@@ -35,6 +36,17 @@ def benchmark_args():
return parser.parse_args() return parser.parse_args()
def show_latency(latency_list):
latency_array = np.array(latency_list)
info = ""
info += "mean :{} ms\n".format(np.mean(latency_array))
info += "median :{} ms\n".format(np.median(latency_array))
info += "80 percent :{} ms\n".format(np.percentile(latency_array, 80))
info += "90 percent :{} ms\n".format(np.percentile(latency_array, 90))
info += "99 percent :{} ms\n".format(np.percentile(latency_array, 99))
sys.stderr.write(info)
class MultiThreadRunner(object): class MultiThreadRunner(object):
def __init__(self): def __init__(self):
pass pass
......
set -x
set -v
function get_model(){
if [ ! -d "bert_cased_L-12_H-768_A-12_model" ]; then
python -c "from paddle_serving_app.models import ServingModels; models = ServingModels();\
models.download(\"$1\")"
tar -xzf $1.tar.gz
fi
}
function bert_demo(){
cd ../python/examples/bert
python prepare_model.py bert_chinese_L-12_H-768_A-12 20
sh benchmark.sh bert_chinese_L-12_H-768_A-12_seq20_model bert_chinese_L-12_H-768_A-12_seq20_client
python prepare_model.py ernie_tiny 20
sh benchmark.sh ernie_tiny_seq20_model ernie_tiny_seq20_client
cd -
}
function imagenet_demo(){
cd ../python/examples/imagenet
sh get_model.sh
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册