diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py index 64091817d84ec2bb7e608acf06a8f550ce5972de..a5674e133cb40948ea5a3f4f3e2f4791de5c4115 100644 --- a/python/examples/bert/bert_web_service.py +++ b/python/examples/bert/bert_web_service.py @@ -30,6 +30,7 @@ class BertService(WebService): def preprocess(self, feed=[], fetch=[]): feed_res = [] is_batch = True + print(feed) for ins in feed: feed_dict = self.reader.process(ins["words"].encode("utf-8")) for key in feed_dict.keys(): diff --git a/python/examples/bert/new_benchmark.py b/python/examples/bert/new_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..ab367ce0d0e46860ef8bb16298b59fd0a06365c2 --- /dev/null +++ b/python/examples/bert/new_benchmark.py @@ -0,0 +1,59 @@ +import sys +import os +import yaml +import requests +import time +import json +import ast +from paddle_serving_client.utils import MultiThreadRunner +from paddle_serving_client.utils import benchmark_args, show_latency + +def parse_benchmark(filein, fileout): + with open(filein, "r") as fin: + res = yaml.load(fin) + del_list = [] + for key in res["DAG"].keys(): + if "call" in key: + del_list.append(key) + for key in del_list: + del res["DAG"][key] + with open(fileout, "w") as fout: + yaml.dump(res, fout, default_flow_style=False) + +def run_http(idx, batch_size): + """ + {"feed":[{"words": "hello"}], "fetch":["pooled_output"]} + """ + print("start thread ({})".format(idx)) + url = "http://127.0.0.1:9292/bert/prediction" + start = time.time() + with open("data-c.txt", 'r') as fin: + start = time.time() + lines = fin.readlines() + start_idx = 0 + while start_idx < len(lines): + end_idx = min(len(lines), start_idx + batch_size) + feed = {} + feed_lst = [{"words": lines[i]} for i in range(start_idx, end_idx)] + data = {"feed": feed_lst, "fetch": ["pooled_output"]} + r = requests.post(url=url, data=json.dumps(data), headers={"Content-Type": "application/json"}) + start_idx += batch_size + end = time.time() + if end - start > 40: + break + end = time.time() + return [[end - start]] + +def multithread_http(thread, batch_size): + multi_thread_runner = MultiThreadRunner() + result = multi_thread_runner.run(run_http , thread, batch_size) + +if __name__ == "__main__": + if sys.argv[1] == "run": + thread = int(sys.argv[2]) + batch_size = int(sys.argv[3]) + multithread_http(thread, batch_size) + if sys.argv[1] == "dump": + filein = sys.argv[2] + fileout = sys.argv[3] + parse_benchmark(filein, fileout) diff --git a/python/examples/bert/new_benchmark.sh b/python/examples/bert/new_benchmark.sh new file mode 100644 index 0000000000000000000000000000000000000000..9373be30f200f6815c195b39c75b5426d5edec1e --- /dev/null +++ b/python/examples/bert/new_benchmark.sh @@ -0,0 +1,25 @@ +modelname="bert" +# HTTP +ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 +sleep 3 +rm -rf profile_log_$modelname +for thread_num in 1 8 16 +do + for batch_size in 1 10 100 + do + python3.7 bert_web_service.py bert_seq128_model/ 9292 & + sleep 3 + echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname + nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py + python3.7 new_benchmark.py run $thread_num $batch_size + python3.7 cpu_utilization.py >>profile_log_$modelname + ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 + python3.7 new_benchmark.py dump benchmark.log benchmark.tmp + mv benchmark.tmp benchmark.log + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname + cat benchmark.log >> profile_log_$modelname + done +done