diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md index 612ba8beb979f5a1ee9d1a8052f73169191f4fd7..acbd3d05cd4197e0e74f9ab04b141488145094ec 100644 --- a/python/examples/bert/README.md +++ b/python/examples/bert/README.md @@ -42,7 +42,7 @@ pip install paddle_serving_app ``` 执行 ``` -cat data-c.txt | python bert_client.py +head data-c.txt | python bert_client.py --model bert_seq20_client/serving_client_conf.prototxt ``` 启动client读取data-c.txt中的数据进行预测,预测结果为文本的向量表示(由于数据较多,脚本中没有将输出进行打印),server端的地址在脚本中修改。 diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py index e0f677146a47c0366a1bbafe9eff049e2671a617..872799e64ea599554e42264c37ab5f574c0acb13 100644 --- a/python/examples/bert/benchmark_batch.py +++ b/python/examples/bert/benchmark_batch.py @@ -35,19 +35,28 @@ def single_func(idx, resource): dataset = [] for line in fin: dataset.append(line.strip()) + profile_flags = False + if os.environ["FLAGS_profile_client"]: + profile_flags = True if args.request == "rpc": reader = BertReader(vocab_file="vocab.txt", max_seq_len=20) fetch = ["pooled_output"] client = Client() client.load_client_config(args.model) client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) - feed_batch = [] - for bi in range(args.batch_size): - feed_batch.append(reader.process(dataset[bi])) - start = time.time() for i in range(1000): if args.batch_size >= 1: + feed_batch = [] + b_start = time.time() + for bi in range(args.batch_size): + feed_batch.append(reader.process(dataset[bi])) + b_end = time.time() + if profile_flags: + print("PROFILE\tpid:{}\tbert+pre_0:{} bert_pre_1:{}".format( + os.getpid(), + int(round(b_start * 1000000)), + int(round(b_end * 1000000)))) result = client.batch_predict( feed_batch=feed_batch, fetch=fetch) else: @@ -62,7 +71,7 @@ def single_func(idx, resource): if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() endpoint_list = [ - "127.0.0.1:9295", "127.0.0.1:9296", "127.0.0.1:9297", "127.0.0.1:9298" + "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" ] result = multi_thread_runner.run(single_func, args.thread, {"endpoint": endpoint_list}) diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py index cceb07c7334aa0cc9b9c347ae53351b7a6fa3513..51364c6745731017b31923d246990497115dc780 100644 --- a/python/examples/bert/bert_client.py +++ b/python/examples/bert/bert_client.py @@ -29,13 +29,13 @@ from paddle_serving_app import ChineseBertReader args = benchmark_args() -reader = ChineseBertReader(max_seq_len=20) +reader = ChineseBertReader({"max_seq_len": 20}) fetch = ["pooled_output"] endpoint_list = ["127.0.0.1:9292"] client = Client() client.load_client_config(args.model) client.connect(endpoint_list) -for line in fin: +for line in sys.stdin: feed_dict = reader.process(line) result = client.predict(feed=feed_dict, fetch=fetch) diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py index e9a222db958bdd0df18e7f15908b722c2b4b71a9..04462ca3b16fecf818aadad63b4f67a8d97014fd 100644 --- a/python/examples/bert/bert_web_service.py +++ b/python/examples/bert/bert_web_service.py @@ -32,8 +32,7 @@ bert_service = BertService(name="bert") bert_service.load() bert_service.load_model_config(sys.argv[1]) gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"] -gpus = [int(x) for x in gpu_ids.split(",")] -bert_service.set_gpus(gpus) +bert_service.set_gpus(gpu_ids) bert_service.prepare_server( workdir="workdir", port=int(sys.argv[2]), device="gpu") bert_service.run_server()