diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..774956a324f1e7c45df0e246af4d580c570822d2 --- /dev/null +++ b/python/examples/bert/benchmark.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from paddle_serving_client import Client +from paddle_serving_client.metric import auc +from paddle_serving_client.utils import MultiThreadRunner +import time +from test_bert_client import BertService + + +def predict(thr_id, resource): + bc = BertService( + model_name="bert_chinese_L-12_H-768_A-12", + max_seq_len=20, + do_lower_case=True) + bc.load_client(resource["conf_file"], resource["server_endpoint"]) + thread_num = resource["thread_num"] + file_list = resource["filelist"] + line_id = 0 + result = [] + label_list = [] + dataset = [] + for fn in file_list: + fin = open(fn) + for line in fin: + if line_id % thread_num == thr_id - 1: + dataset.append(line.strip()) + line_id += 1 + fin.close() + + start = time.time() + fetch = ["pooled_output"] + for inst in dataset: + fetch_map = bc.run_general([[inst]], fetch) + result.append(fetch_map["pooled_output"]) + end = time.time() + return [result, label_list, [end - start]] + + +if __name__ == '__main__': + conf_file = sys.argv[1] + data_file = sys.argv[2] + thread_num = sys.argv[3] + resource = {} + resource["conf_file"] = conf_file + resource["server_endpoint"] = ["127.0.0.1:9293"] + resource["filelist"] = [data_file] + resource["thread_num"] = int(thread_num) + + thread_runner = MultiThreadRunner() + result = thread_runner.run(predict, int(sys.argv[3]), resource) + + print("total time {} s".format(sum(result[-1]) / len(result[-1]))) diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh new file mode 100644 index 0000000000000000000000000000000000000000..9a2fa2d8f3440cf8f7dd1bf69707b8e0414c703e --- /dev/null +++ b/python/examples/bert/benchmark.sh @@ -0,0 +1,7 @@ +rm profile_log +for thread_num in 1 4 8 12 16 20 24 +do + $PYTHONROOT/bin/python benchmark.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1 + $PYTHONROOT/bin/python ../imdb/show_profile.py profile $thread_num >> profile_log + tail -n 1 profile >> profile_log +done diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py new file mode 100644 index 0000000000000000000000000000000000000000..8cf4b3a082d3472bc2130a0ed52184e75f165eb9 --- /dev/null +++ b/python/examples/bert/benchmark_batch.py @@ -0,0 +1,71 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from paddle_serving_client import Client +from paddle_serving_client.metric import auc +from paddle_serving_client.utils import MultiThreadRunner +import time +from test_bert_client import BertService + + +def predict(thr_id, resource, batch_size): + bc = BertService( + model_name="bert_chinese_L-12_H-768_A-12", + max_seq_len=20, + do_lower_case=True) + bc.load_client(resource["conf_file"], resource["server_endpoint"]) + thread_num = resource["thread_num"] + file_list = resource["filelist"] + line_id = 0 + result = [] + label_list = [] + dataset = [] + for fn in file_list: + fin = open(fn) + for line in fin: + if line_id % thread_num == thr_id - 1: + dataset.append(line.strip()) + line_id += 1 + fin.close() + + start = time.time() + fetch = ["pooled_output"] + batch = [] + for inst in dataset: + if len(batch) < batch_size: + batch.append([inst]) + else: + fetch_map_batch = bc.run_batch_general(batch, fetch) + batch = [] + result.append(fetch_map_batch) + end = time.time() + return [result, label_list, [end - start]] + + +if __name__ == '__main__': + conf_file = sys.argv[1] + data_file = sys.argv[2] + thread_num = sys.argv[3] + batch_size = sys.ragv[4] + resource = {} + resource["conf_file"] = conf_file + resource["server_endpoint"] = ["127.0.0.1:9293"] + resource["filelist"] = [data_file] + resource["thread_num"] = int(thread_num) + + thread_runner = MultiThreadRunner() + result = thread_runner.run(predict, int(sys.argv[3]), resource, batch_size) + + print("total time {} s".format(sum(result[-1]) / len(result[-1]))) diff --git a/python/examples/bert/benchmark_batch.sh b/python/examples/bert/benchmark_batch.sh new file mode 100644 index 0000000000000000000000000000000000000000..31965cf53c8afd9591ecf570994c3c60653e9962 --- /dev/null +++ b/python/examples/bert/benchmark_batch.sh @@ -0,0 +1,8 @@ +rm profile_log +thread_num=1 +for batch_size in 1 4 8 16 32 64 128 256 +do + $PYTHONROOT/bin/python benchmark_batch.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1 + $PYTHONROOT/bin/python ../imdb/show_profile.py profile $thread_num >> profile_log + tail -n 1 profile >> profile_log +done diff --git a/python/examples/bert/prepare_model.py b/python/examples/bert/prepare_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b5f80a78feb07a0617bec3833bbe1cf3884d7dea --- /dev/null +++ b/python/examples/bert/prepare_model.py @@ -0,0 +1,43 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddlehub as hub +import paddle.fluid as fluid +import paddle_serving_client.io as serving_io + +model_name = "bert_chinese_L-12_H-768_A-12" +module = hub.Module(model_name) +inputs, outputs, program = module.context(trainable=True, max_seq_len=20) +place = fluid.core_avx.CPUPlace() +exe = fluid.Executor(place) +input_ids = inputs["input_ids"] +position_ids = inputs["position_ids"] +segment_ids = inputs["segment_ids"] +input_mask = inputs["input_mask"] +pooled_output = outputs["pooled_output"] +sequence_output = outputs["sequence_output"] + +feed_var_names = [ + input_ids.name, position_ids.name, segment_ids.name, input_mask.name +] + +target_vars = [pooled_output, sequence_output] + +serving_io.save_model("serving_server_model", "serving_client_conf", { + "input_ids": input_ids, + "position_ids": position_ids, + "segment_ids": segment_ids, + "input_mask": input_mask, +}, {"pooled_output": pooled_output, + "sequence_output": sequence_output}, program) diff --git a/python/examples/bert/test_bert_client.py b/python/examples/bert/test_bert_client.py new file mode 100644 index 0000000000000000000000000000000000000000..47b5bf6c290f3ddb9341e0462eab7d197dfc4180 --- /dev/null +++ b/python/examples/bert/test_bert_client.py @@ -0,0 +1,141 @@ +# coding:utf-8 +import sys +import numpy as np +import paddlehub as hub +import ujson +import random +from paddlehub.common.logger import logger +import socket +from paddle_serving_client import Client + +_ver = sys.version_info +is_py2 = (_ver[0] == 2) +is_py3 = (_ver[0] == 3) + +if is_py2: + import httplib +if is_py3: + import http.client as httplib + + +class BertService(): + def __init__(self, + profile=False, + max_seq_len=128, + model_name="bert_uncased_L-12_H-768_A-12", + show_ids=False, + do_lower_case=True, + process_id=0, + retry=3, + load_balance='round_robin'): + self.process_id = process_id + self.reader_flag = False + self.batch_size = 0 + self.max_seq_len = max_seq_len + self.profile = profile + self.model_name = model_name + self.show_ids = show_ids + self.do_lower_case = do_lower_case + self.con_list = [] + self.con_index = 0 + self.load_balance = load_balance + self.server_list = [] + self.serving_list = [] + self.feed_var_names = '' + self.retry = retry + + module = hub.Module(name=self.model_name) + inputs, outputs, program = module.context( + trainable=True, max_seq_len=self.max_seq_len) + input_ids = inputs["input_ids"] + position_ids = inputs["position_ids"] + segment_ids = inputs["segment_ids"] + input_mask = inputs["input_mask"] + self.feed_var_names = input_ids.name + ';' + position_ids.name + ';' + segment_ids.name + ';' + input_mask.name + self.reader = hub.reader.ClassifyReader( + vocab_path=module.get_vocab_path(), + dataset=None, + max_seq_len=self.max_seq_len, + do_lower_case=self.do_lower_case) + self.reader_flag = True + + def load_client(self, config_file, server_addr): + self.client = Client() + self.client.load_client_config(config_file) + self.client.connect(server_addr) + + def run_general(self, text, fetch): + self.batch_size = len(text) + data_generator = self.reader.data_generator( + batch_size=self.batch_size, phase='predict', data=text) + result = [] + for run_step, batch in enumerate(data_generator(), start=1): + token_list = batch[0][0].reshape(-1).tolist() + pos_list = batch[0][1].reshape(-1).tolist() + sent_list = batch[0][2].reshape(-1).tolist() + mask_list = batch[0][3].reshape(-1).tolist() + for si in range(self.batch_size): + feed = { + "input_ids": token_list, + "position_ids": pos_list, + "segment_ids": sent_list, + "input_mask": mask_list + } + fetch_map = self.client.predict(feed=feed, fetch=fetch) + + return fetch_map + + def run_batch_general(self, text, fetch): + self.batch_size = len(text) + data_generator = self.reader.data_generator( + batch_size=self.batch_size, phase='predict', data=text) + result = [] + for run_step, batch in enumerate(data_generator(), start=1): + token_list = batch[0][0].reshape(-1).tolist() + pos_list = batch[0][1].reshape(-1).tolist() + sent_list = batch[0][2].reshape(-1).tolist() + mask_list = batch[0][3].reshape(-1).tolist() + feed_batch = [] + for si in range(self.batch_size): + feed = { + "input_ids": token_list[si * self.max_seq_len:(si + 1) * + self.max_seq_len], + "position_ids": + pos_list[si * self.max_seq_len:(si + 1) * self.max_seq_len], + "segment_ids": sent_list[si * self.max_seq_len:(si + 1) * + self.max_seq_len], + "input_mask": + mask_list[si * self.max_seq_len:(si + 1) * self.max_seq_len] + } + feed_batch.append(feed) + fetch_map_batch = self.client.batch_predict( + feed_batch=feed_batch, fetch=fetch) + return fetch_map_batch + + +def test(): + + bc = BertService( + model_name='bert_uncased_L-12_H-768_A-12', + max_seq_len=20, + show_ids=False, + do_lower_case=True) + server_addr = ["127.0.0.1:9293"] + config_file = './serving_client_conf/serving_client_conf.prototxt' + fetch = ["pooled_output"] + bc.load_client(config_file, server_addr) + batch_size = 4 + batch = [] + for line in sys.stdin: + if len(batch) < batch_size: + batch.append([line.strip()]) + else: + result = bc.run_batch_general(batch, fetch) + batch = [] + for r in result: + for e in r["pooled_output"]: + print(e) + + +if __name__ == '__main__': + test() diff --git a/python/examples/bert/test_gpu_server.py b/python/examples/bert/test_gpu_server.py new file mode 100644 index 0000000000000000000000000000000000000000..54459a12e88646555bd37b33441a3b50e2b0e62d --- /dev/null +++ b/python/examples/bert/test_gpu_server.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from paddle_serving_server_gpu import OpMaker +from paddle_serving_server_gpu import OpSeqMaker +from paddle_serving_server_gpu import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_infer_op = op_maker.create('general_infer') +general_response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) +op_seq_maker.add_op(general_response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.set_num_threads(8) +server.set_memory_optimize(True) +server.set_gpuid(1) + +server.load_model_config(sys.argv[1]) +port = int(sys.argv[2]) +server.prepare_server(workdir="work_dir1", port=port, device="gpu") +server.run_server() diff --git a/python/examples/bert/test_server.py b/python/examples/bert/test_server.py new file mode 100644 index 0000000000000000000000000000000000000000..52b74b4622cfa3add6ad41678339924e3f9c3b0c --- /dev/null +++ b/python/examples/bert/test_server.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_infer_op = op_maker.create('general_infer') +general_response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) +op_seq_maker.add_op(general_response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.set_num_threads(4) +server.set_local_bin( + "~/github/Serving/build_server/core/general-server/serving") + +server.load_model_config(sys.argv[1]) +port = int(sys.argv[2]) +server.prepare_server(workdir="work_dir1", port=port, device="cpu") +server.run_server() diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py index a7e913f8e5ed60a0ba4d0f9bb9d54e349926d131..1bec0057adebc59a0b4029766f4c22e227b28c1c 100644 --- a/python/examples/imdb/benchmark.py +++ b/python/examples/imdb/benchmark.py @@ -43,15 +43,13 @@ def predict(thr_id, resource): start = time.time() fetch = ["acc", "cost", "prediction"] - infer_time_list = [] for inst in dataset: - fetch_map = client.predict(feed=inst, fetch=fetch, profile=True) + fetch_map = client.predict(feed=inst, fetch=fetch) prob.append(fetch_map["prediction"][1]) label_list.append(label[0]) - infer_time_list.append(fetch_map["infer_time"]) end = time.time() client.release() - return [prob, label_list, [sum(infer_time_list)], [end - start]] + return [prob, label_list, [end - start]] if __name__ == '__main__': @@ -59,14 +57,11 @@ if __name__ == '__main__': data_file = sys.argv[2] resource = {} resource["conf_file"] = conf_file - resource["server_endpoint"] = ["127.0.0.1:9292"] + resource["server_endpoint"] = ["127.0.0.1:9293"] resource["filelist"] = [data_file] resource["thread_num"] = int(sys.argv[3]) thread_runner = MultiThreadRunner() result = thread_runner.run(predict, int(sys.argv[3]), resource) - print("thread num {}\ttotal time {}".format(sys.argv[ - 3], sum(result[-1]) / len(result[-1]))) - print("thread num {}\ttotal time {}".format(sys.argv[ - 3], sum(result[2]) / 1000.0 / 1000.0 / len(result[2]))) + print("total time {} s".format(sum(result[-1]) / len(result[-1]))) diff --git a/python/examples/imdb/show_profile.py b/python/examples/imdb/show_profile.py new file mode 100644 index 0000000000000000000000000000000000000000..4438de4e610afabc95ebc7b2daea01404e684ca0 --- /dev/null +++ b/python/examples/imdb/show_profile.py @@ -0,0 +1,37 @@ +#coding=utf-8 +import sys +import collections + +profile_file = sys.argv[1] +thread_num = sys.argv[2] +time_dict = collections.OrderedDict() + + +def prase(line): + profile_list = line.split(" ") + num = len(profile_list) + for idx in range(num / 2): + profile_0_list = profile_list[idx * 2].split(":") + profile_1_list = profile_list[idx * 2 + 1].split(":") + if len(profile_0_list[0].split("_")) == 2: + name = profile_0_list[0].split("_")[0] + else: + name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[ + 0].split("_")[1] + cost = long(profile_1_list[1]) - long(profile_0_list[1]) + if name not in time_dict: + time_dict[name] = cost + else: + time_dict[name] += cost + + +with open(profile_file) as f: + for line in f.readlines(): + line = line.strip().split("\t") + if line[0] == "PROFILE": + prase(line[1]) + +print("thread num {}".format(thread_num)) +for name in time_dict: + print("{} cost {} s per thread ".format(name, time_dict[name] / ( + 1000000.0 * float(thread_num))))