diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py index e14c02fe1231c1ab04bcf1fda67046ea6b3806bb..ff835ab844fe0bdebcf6e0b760a7579037c8d5af 100644 --- a/python/examples/bert/benchmark.py +++ b/python/examples/bert/benchmark.py @@ -27,7 +27,6 @@ import tokenization import requests import json from bert_reader import BertReader - args = benchmark_args() @@ -36,42 +35,56 @@ def single_func(idx, resource): dataset = [] for line in fin: dataset.append(line.strip()) + profile_flags = False + if os.getenv("FLAGS_profile_client"): + profile_flags = True if args.request == "rpc": reader = BertReader(vocab_file="vocab.txt", max_seq_len=20) fetch = ["pooled_output"] client = Client() client.load_client_config(args.model) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) - start = time.time() - for i in range(1000): - if args.batch_size == 1: - feed_dict = reader.process(dataset[i]) - result = client.predict(feed=feed_dict, fetch=fetch) + for i in range(turns): + if args.batch_size >= 1: + feed_batch = [] + b_start = time.time() + for bi in range(args.batch_size): + feed_batch.append(reader.process(dataset[bi])) + b_end = time.time() + if profile_flags: + sys.stderr.write( + "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format( + os.getpid(), + int(round(b_start * 1000000)), + int(round(b_end * 1000000)))) + result = client.predict(feed=feed_batch, fetch=fetch) else: print("unsupport batch size {}".format(args.batch_size)) elif args.request == "http": - start = time.time() - header = {"Content-Type": "application/json"} - for i in range(1000): - dict_data = {"words": dataset[i], "fetch": ["pooled_output"]} - r = requests.post( - 'http://{}/bert/prediction'.format(resource["endpoint"][ - idx % len(resource["endpoint"])]), - data=json.dumps(dict_data), - headers=header) + raise ("not implemented") end = time.time() return [[end - start]] if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = ["127.0.0.1:9292"] - result = multi_thread_runner.run(single_func, args.thread, - {"endpoint": endpoint_list}) + endpoint_list = [ + "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" + ] + turns = 1000 + start = time.time() + result = multi_thread_runner.run( + single_func, args.thread, {"endpoint": endpoint_list, + "turns": turns}) avg_cost = 0 for i in range(args.thread): avg_cost += result[0][i] avg_cost = avg_cost / args.thread - print("average total cost {} s.".format(avg_cost)) + end = time.time() + total_cost = end - start + print("total cost :{} s".format(total_cost)) + print("each thread cost :{} s. ".format(avg_cost)) + print("qps :{} samples/s".format(args.batch_size * args.thread * turns / + total_cost)) diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh index 7f9e2325f3b8f7db288d2b7d82d0d412e05417cb..fef2a73664866316b37603a557f6ad3b34705548 100644 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -1,9 +1,26 @@ rm profile_log -for thread_num in 1 2 4 8 16 +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_profile_server=1 +export FLAGS_profile_client=1 +python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & + +sleep 5 + +#warm up +$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model ./bert_seq20_client/serving_client_conf.prototxt --request rpc > profile 2>&1 + +for thread_num in 8 16 32 do - $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 - echo "========================================" - echo "batch size : $batch_size" >> profile_log +for batch_size in 1 4 16 64 256 +do + $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model ./bert_seq20_client/serving_client_conf.prototxt --request rpc > profile 2>&1 + echo "thread num :" $thread_num + echo "batch size :" $batch_size + echo "=================Done====================" + echo "batch size :$batch_size" >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log - tail -n 1 profile >> profile_log + tail -n 3 profile >> profile_log +done done + +ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py deleted file mode 100644 index 7cedb6aa451e0e4a128f0fedbfde1a896977f601..0000000000000000000000000000000000000000 --- a/python/examples/bert/benchmark_batch.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# pylint: disable=doc-string-missing - -from __future__ import unicode_literals, absolute_import -import os -import sys -import time -from paddle_serving_client import Client -from paddle_serving_client.utils import MultiThreadRunner -from paddle_serving_client.utils import benchmark_args -from batching import pad_batch_data -import tokenization -import requests -import json -from bert_reader import BertReader -args = benchmark_args() - - -def single_func(idx, resource): - fin = open("data-c.txt") - dataset = [] - for line in fin: - dataset.append(line.strip()) - profile_flags = False - if os.environ["FLAGS_profile_client"]: - profile_flags = True - if args.request == "rpc": - reader = BertReader(vocab_file="vocab.txt", max_seq_len=20) - fetch = ["pooled_output"] - client = Client() - client.load_client_config(args.model) - client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) - start = time.time() - for i in range(1000): - if args.batch_size >= 1: - feed_batch = [] - b_start = time.time() - for bi in range(args.batch_size): - feed_batch.append(reader.process(dataset[bi])) - b_end = time.time() - if profile_flags: - print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format( - os.getpid(), - int(round(b_start * 1000000)), - int(round(b_end * 1000000)))) - result = client.predict(feed=feed_batch, fetch=fetch) - else: - print("unsupport batch size {}".format(args.batch_size)) - - elif args.request == "http": - raise ("no batch predict for http") - end = time.time() - return [[end - start]] - - -if __name__ == '__main__': - multi_thread_runner = MultiThreadRunner() - endpoint_list = ["127.0.0.1:9292"] - result = multi_thread_runner.run(single_func, args.thread, - {"endpoint": endpoint_list}) - avg_cost = 0 - for i in range(args.thread): - avg_cost += result[0][i] - avg_cost = avg_cost / args.thread - print("average total cost {} s.".format(avg_cost)) diff --git a/python/examples/bert/benchmark_batch.sh b/python/examples/bert/benchmark_batch.sh deleted file mode 100644 index 272923776d6640880175745920a8fad9e84972fd..0000000000000000000000000000000000000000 --- a/python/examples/bert/benchmark_batch.sh +++ /dev/null @@ -1,19 +0,0 @@ -rm profile_log -export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & - -sleep 5 - -for thread_num in 1 2 4 8 16 -do -for batch_size in 1 2 4 8 16 32 64 128 256 512 -do - $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 - echo "========================================" - echo "thread num: ", $thread_num - echo "batch size: ", $batch_size - echo "batch size : $batch_size" >> profile_log - $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log - tail -n 1 profile >> profile_log -done -done diff --git a/python/examples/util/show_profile.py b/python/examples/util/show_profile.py index 9153d939338f0ee171af539b9f955d51802ad547..1581dda19bb0abefe6eb21592bda7fc97d8fb7cd 100644 --- a/python/examples/util/show_profile.py +++ b/python/examples/util/show_profile.py @@ -31,7 +31,7 @@ with open(profile_file) as f: if line[0] == "PROFILE": prase(line[2]) -print("thread num {}".format(thread_num)) +print("thread num :{}".format(thread_num)) for name in time_dict: - print("{} cost {} s in each thread ".format(name, time_dict[name] / ( + print("{} cost :{} s in each thread ".format(name, time_dict[name] / ( 1000000.0 * float(thread_num))))