diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py index f1533d9710d3149a37818d3f1bc146fad6ce6537..3ac9d07625e881b43550578c4a6346e4ac874063 100644 --- a/python/examples/bert/benchmark.py +++ b/python/examples/bert/benchmark.py @@ -19,6 +19,8 @@ from __future__ import unicode_literals, absolute_import import os import sys import time +import json +import requests from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import benchmark_args, show_latency @@ -72,7 +74,39 @@ def single_func(idx, resource): print("unsupport batch size {}".format(args.batch_size)) elif args.request == "http": - raise ("not implemented") + reader = ChineseBertReader({"max_seq_len": 128}) + fetch = ["pooled_output"] + server = "http://" + resource["endpoint"][idx % len(resource[ + "endpoint"])] + "/bert/prediction" + start = time.time() + for i in range(turns): + if args.batch_size >= 1: + l_start = time.time() + feed_batch = [] + b_start = time.time() + for bi in range(args.batch_size): + feed_batch.append({"words": dataset[bi]}) + req = json.dumps({"feed": feed_batch, "fetch": fetch}) + b_end = time.time() + + if profile_flags: + sys.stderr.write( + "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format( + os.getpid(), + int(round(b_start * 1000000)), + int(round(b_end * 1000000)))) + result = requests.post( + server, + data=req, + headers={"Content-Type": "application/json"}) + l_end = time.time() + if latency_flags: + latency_list.append(l_end * 1000 - l_start * 1000) + else: + print("unsupport batch size {}".format(args.batch_size)) + + else: + raise ValueError("not implemented {} request".format(args.request)) end = time.time() if latency_flags: return [[end - start], latency_list] @@ -82,9 +116,7 @@ def single_func(idx, resource): if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() - endpoint_list = [ - "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" - ] + endpoint_list = ["127.0.0.1:9292"] turns = 10 start = time.time() result = multi_thread_runner.run( diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py index b72d17f142c65bafe8ef13e1a963aacce6b3e821..362ac67915870af9d11209520daa61daa95082c1 100644 --- a/python/examples/bert/bert_client.py +++ b/python/examples/bert/bert_client.py @@ -14,15 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import sys -import numpy as np -import paddlehub as hub -import ujson -import random -import time -from paddlehub.common.logger import logger -import socket from paddle_serving_client import Client from paddle_serving_client.utils import benchmark_args from paddle_serving_app.reader import ChineseBertReader diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py index ac7ba8c333d25fb23bfc7695105315bfaa4e76ee..5c4c44cc1bd091af6c4d343d2b7f0f436cca2e7e 100644 --- a/python/examples/imagenet/benchmark.py +++ b/python/examples/imagenet/benchmark.py @@ -73,7 +73,7 @@ def single_func(idx, resource): print("unsupport batch size {}".format(args.batch_size)) elif args.request == "http": - py_version = 2 + py_version = sys.version_info[0] server = "http://" + resource["endpoint"][idx % len(resource[ "endpoint"])] + "/image/prediction" start = time.time()