diff --git a/doc/PIPELINE_SERVING_CN.md b/doc/PIPELINE_SERVING_CN.md index d53f62869978471c7541d4342d57764397ed0435..e9eb74f8e0271b334994b24cf03a2f02082a5160 100644 --- a/doc/PIPELINE_SERVING_CN.md +++ b/doc/PIPELINE_SERVING_CN.md @@ -149,6 +149,8 @@ def __init__(name=None, + + ### 2. 普通 OP二次开发接口 OP 二次开发的目的是满足业务开发人员控制OP处理策略。 diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md index 1fde6d46625af8513ee244ab9c0865cccfe05a20..7bada93876f8f043b0046b83c3dc3707129079a7 100644 --- a/python/examples/bert/README.md +++ b/python/examples/bert/README.md @@ -84,3 +84,9 @@ set environmental variable to specify which gpus are used, the command above mea ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction ``` + +## Benchmark +``` shell +bash benchmark.sh bert_seq128_model bert_seq128_client +``` +The output log file of benchmark named `profile_log_bert_seq128_model` diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md index 060c5579af6d2772ed666fda6f023245bf881213..ef28089b559b7281613ae2fb78b8039978db2510 100644 --- a/python/examples/bert/README_CN.md +++ b/python/examples/bert/README_CN.md @@ -88,3 +88,10 @@ python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务 ``` curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction ``` + +## 性能测试 +``` shell +bash benchmark.sh bert_seq128_model bert_seq128_client +``` +性能测试的日志文件为profile_log_bert_seq128_model +如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py old mode 100644 new mode 100755 index c177d4b8c25eb8a79c9a851399f530f197499964..eff82051c37dcbfef6002e2e14af643dac4864a9 --- a/python/examples/bert/benchmark.py +++ b/python/examples/bert/benchmark.py @@ -21,6 +21,7 @@ import sys import time import json import requests +import numpy as np from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import benchmark_args, show_latency @@ -56,7 +57,11 @@ def single_func(idx, resource): feed_batch = [] b_start = time.time() for bi in range(args.batch_size): - feed_batch.append(reader.process(dataset[bi])) + feed_dict = reader.process(dataset[bi]) + for key in feed_dict.keys(): + feed_dict[key] = np.array(feed_dict[key]).reshape( + (1, 128, 1)) + feed_batch.append(feed_dict) b_end = time.time() if profile_flags: @@ -116,9 +121,7 @@ def single_func(idx, resource): if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = [ - "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" - ] + endpoint_list = ["127.0.0.1:9292", "127.0.0.1:9293"] turns = 100 start = time.time() result = multi_thread_runner.run( diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh old mode 100644 new mode 100755 index 525e955e93a8663d3ed65ea1d39bed1f4f6babfe..1a8263556d345f367f503460274f1cb0165df2c5 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -1,5 +1,5 @@ rm profile_log* -export CUDA_VISIBLE_DEVICES=0,1,2,3 +export CUDA_VISIBLE_DEVICES=0,1 export FLAGS_profile_server=1 export FLAGS_profile_client=1 export FLAGS_serving_latency=1 @@ -12,7 +12,7 @@ else mkdir utilization fi #start server -$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & +$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 & sleep 5 #warm up diff --git a/python/examples/bert/benchmark_with_profile.sh b/python/examples/bert/benchmark_with_profile.sh old mode 100644 new mode 100755 index 074a9acd2455768bf21b96471a6449231d3e8546..f36fbbce917d2956195d08e7638e06d84caf961a --- a/python/examples/bert/benchmark_with_profile.sh +++ b/python/examples/bert/benchmark_with_profile.sh @@ -1,5 +1,5 @@ -export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & +export CUDA_VISIBLE_DEVICES=0,1 +python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1 2> elog > stdlog & export FLAGS_profile_client=1 export FLAGS_profile_server=1 sleep 5 diff --git a/python/examples/fit_a_line/README.md b/python/examples/fit_a_line/README.md index af45b2a854381cb5c5739e9c89518d2e80753f1b..77583ce596727d5d0335696fab10960550352ccb 100644 --- a/python/examples/fit_a_line/README.md +++ b/python/examples/fit_a_line/README.md @@ -42,3 +42,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ``` shell curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction ``` + +## Benchmark +``` shell +bash benchmark.sh uci_housing_model uci_housing_client +``` +The log file of benchmark named `profile_log_uci_housing_model` diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md index 9ef55749b123f00b1e0da4627bdad6de5cea0d98..e115b6debb330adbd4c81f94338a67305caa6d37 100644 --- a/python/examples/fit_a_line/README_CN.md +++ b/python/examples/fit_a_line/README_CN.md @@ -43,3 +43,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ``` shell curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction ``` + +## 性能测试 +``` shell +bash benchmark.sh uci_housing_model uci_housing_client +``` +性能测试的日志文件为profile_log_uci_housing_model +如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 diff --git a/python/examples/fit_a_line/benchmark.py b/python/examples/fit_a_line/benchmark.py index b1550b2ff5d40aa10e3f415c82235c06a4508012..77f0965f778f760d68b0737273a5536e48350606 100644 --- a/python/examples/fit_a_line/benchmark.py +++ b/python/examples/fit_a_line/benchmark.py @@ -15,7 +15,7 @@ from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner -from paddle_serving_client.utils import benchmark_args +from paddle_serving_client.utils import benchmark_args, show_latency import time import paddle import sys @@ -37,9 +37,6 @@ def single_func(idx, resource): client.connect([args.endpoint]) start = time.time() for data in train_reader(): - #new_data = np.zeros((1, 13)).astype("float32") - #new_data[0] = data[0][0] - #fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True) fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) end = time.time() return [[end - start], [total_number]] @@ -57,6 +54,17 @@ def single_func(idx, resource): return [[end - start], [total_number]] +start = time.time() multi_thread_runner = MultiThreadRunner() result = multi_thread_runner.run(single_func, args.thread, {}) -print(result) +end = time.time() +total_cost = end - start +avg_cost = 0 +for i in range(args.thread): + avg_cost += result[0][i] +avg_cost = avg_cost / args.thread + +print("total cost: {}s".format(total_cost)) +print("each thread cost: {}s. ".format(avg_cost)) +print("qps: {}samples/s".format(args.batch_size * args.thread / total_cost)) +show_latency(result[1]) diff --git a/python/examples/util/show_profile.py b/python/examples/util/show_profile.py old mode 100644 new mode 100755 index 3815ad9ec943329a29767ca8f4217943f0d84e4b..a726e765e4c0a9b77263eb3ea67658545913de58 --- a/python/examples/util/show_profile.py +++ b/python/examples/util/show_profile.py @@ -5,6 +5,7 @@ import collections profile_file = sys.argv[1] thread_num = sys.argv[2] time_dict = collections.OrderedDict() +query_count = 0 def prase(line): @@ -26,12 +27,15 @@ def prase(line): with open(profile_file) as f: + query_count = 0 for line in f.readlines(): line = line.strip().split("\t") if line[0] == "PROFILE": prase(line[2]) + query_count += 1 print("thread_num: {}".format(thread_num)) +print("query_count: {}".format(query_count)) for name in time_dict: print("{} cost: {}s in each thread ".format(name, time_dict[name] / ( 1000000.0 * float(thread_num))))