From 0af4cfdb55c6d9f1929c6bae87fc7bf9917bfed8 Mon Sep 17 00:00:00 2001 From: MRXLT Date: Thu, 26 Dec 2019 06:43:25 +0000 Subject: [PATCH] add paddle-gpu-serving --- demo-client/src/bert_service.cpp | 2 - demo-serving/op/bert_service_op.cpp | 7 +- .../client/bert_service/bert_service.py | 245 ++++++++++++++++ .../paddle-gpu-serving/README.md | 45 +++ .../paddle_gpu_serving/__init__.py | 15 + .../paddle_gpu_serving/run/__init__.py | 262 ++++++++++++++++++ .../paddle_gpu_serving/server/conf/cube.conf | 15 + .../server/conf/gflags.conf | 2 + .../server/conf/model_toolkit.prototxt | 11 + .../server/conf/model_toolkit.prototxt.0 | 11 + .../server/conf/model_toolkit.prototxt.1 | 11 + .../server/conf/model_toolkit.prototxt.10 | 11 + .../server/conf/model_toolkit.prototxt.11 | 11 + .../server/conf/model_toolkit.prototxt.12 | 11 + .../server/conf/model_toolkit.prototxt.13 | 11 + .../server/conf/model_toolkit.prototxt.14 | 11 + .../server/conf/model_toolkit.prototxt.15 | 11 + .../server/conf/model_toolkit.prototxt.2 | 11 + .../server/conf/model_toolkit.prototxt.3 | 11 + .../server/conf/model_toolkit.prototxt.4 | 11 + .../server/conf/model_toolkit.prototxt.5 | 11 + .../server/conf/model_toolkit.prototxt.6 | 11 + .../server/conf/model_toolkit.prototxt.7 | 11 + .../server/conf/model_toolkit.prototxt.8 | 11 + .../server/conf/model_toolkit.prototxt.9 | 11 + .../server/conf/resource.prototxt | 3 + .../server/conf/resource.prototxt.0 | 3 + .../server/conf/resource.prototxt.1 | 3 + .../server/conf/resource.prototxt.10 | 3 + .../server/conf/resource.prototxt.11 | 3 + .../server/conf/resource.prototxt.12 | 3 + .../server/conf/resource.prototxt.13 | 3 + .../server/conf/resource.prototxt.14 | 3 + .../server/conf/resource.prototxt.15 | 3 + .../server/conf/resource.prototxt.2 | 3 + .../server/conf/resource.prototxt.3 | 3 + .../server/conf/resource.prototxt.4 | 3 + .../server/conf/resource.prototxt.5 | 3 + .../server/conf/resource.prototxt.6 | 3 + .../server/conf/resource.prototxt.7 | 3 + .../server/conf/resource.prototxt.8 | 3 + .../server/conf/resource.prototxt.9 | 3 + .../server/conf/service.prototxt | 4 + .../server/conf/workflow.prototxt | 94 +++++++ .../data/model/paddle/fluid_reload_flag | 2 + .../server/data/model/paddle/fluid_time_file | 2 + .../paddle-gpu-serving/setup.py | 50 ++++ 47 files changed, 978 insertions(+), 5 deletions(-) create mode 100644 paddle-gpu-serving/client/bert_service/bert_service.py create mode 100644 paddle-gpu-serving/paddle-gpu-serving/README.md create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/__init__.py create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/run/__init__.py create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/cube.conf create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/gflags.conf create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.0 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.1 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.10 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.11 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.12 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.13 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.14 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.15 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.2 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.3 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.4 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.5 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.6 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.7 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.8 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.9 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.0 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.1 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.10 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.11 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.12 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.13 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.14 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.15 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.2 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.3 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.4 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.5 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.6 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.7 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.8 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.9 create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/service.prototxt create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/workflow.prototxt create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_reload_flag create mode 100644 paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_time_file create mode 100644 paddle-gpu-serving/paddle-gpu-serving/setup.py diff --git a/demo-client/src/bert_service.cpp b/demo-client/src/bert_service.cpp index 1910b184..66af91d5 100644 --- a/demo-client/src/bert_service.cpp +++ b/demo-client/src/bert_service.cpp @@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::bert_service::EmbeddingValues; extern int batch_size = 1; extern int max_seq_len = 128; extern int layer_num = 12; -extern int emb_size = 768; extern int thread_num = 1; extern int max_turn = 1000; @@ -95,7 +94,6 @@ int create_req(Request* req, } } req->set_max_seq_len(max_seq_len); - req->set_emb_size(emb_size); return 0; } diff --git a/demo-serving/op/bert_service_op.cpp b/demo-serving/op/bert_service_op.cpp index 831ab855..47828212 100644 --- a/demo-serving/op/bert_service_op.cpp +++ b/demo-serving/op/bert_service_op.cpp @@ -59,7 +59,7 @@ int BertServiceOp::inference() { } const int64_t MAX_SEQ_LEN = req->max_seq_len(); - const int64_t EMB_SIZE = req->emb_size(); + // const int64_t EMB_SIZE = req->emb_size(); paddle::PaddleTensor src_ids; paddle::PaddleTensor pos_ids; @@ -172,12 +172,13 @@ int BertServiceOp::inference() { LOG(INFO) << "batch_size : " << out->at(0).shape[0] << " emb_size : " << out->at(0).shape[1]; - float *out_data = reinterpret_cast(out->at(0).data.data()); + uint32_t emb_size = out->at(0).shape[1] float *out_data = + reinterpret_cast(out->at(0).data.data()); for (uint32_t bi = 0; bi < batch_size; bi++) { BertResInstance *res_instance = res->add_instances(); for (uint32_t si = 0; si < 1; si++) { EmbeddingValues *emb_instance = res_instance->add_instances(); - for (uint32_t ei = 0; ei < EMB_SIZE; ei++) { + for (uint32_t ei = 0; ei < emb_size; ei++) { uint32_t index = bi * EMB_SIZE + ei; emb_instance->add_values(out_data[index]); } diff --git a/paddle-gpu-serving/client/bert_service/bert_service.py b/paddle-gpu-serving/client/bert_service/bert_service.py new file mode 100644 index 00000000..5aa9331b --- /dev/null +++ b/paddle-gpu-serving/client/bert_service/bert_service.py @@ -0,0 +1,245 @@ +# coding:utf-8 +import sys +import numpy as np +import paddlehub as hub +import ujson +import random +from paddlehub.common.logger import logger +import socket + +_ver = sys.version_info +is_py2 = (_ver[0] == 2) +is_py3 = (_ver[0] == 3) + +if is_py2: + import httplib +if is_py3: + import http.client as httplib + + +class BertService(): + def __init__(self, + profile=False, + max_seq_len=128, + model_name="bert_uncased_L-12_H-768_A-12", + show_ids=False, + do_lower_case=True, + process_id=0, + retry=3, + load_balance='round_robin'): + self.process_id = process_id + self.reader_flag = False + self.batch_size = 0 + self.max_seq_len = max_seq_len + self.profile = profile + self.model_name = model_name + self.show_ids = show_ids + self.do_lower_case = do_lower_case + self.con_list = [] + self.con_index = 0 + self.load_balance = load_balance + self.server_list = [] + self.serving_list = [] + self.feed_var_names = '' + self.retry = retry + + module = hub.Module(name=self.model_name) + inputs, outputs, program = module.context( + trainable=True, max_seq_len=self.max_seq_len) + input_ids = inputs["input_ids"] + position_ids = inputs["position_ids"] + segment_ids = inputs["segment_ids"] + input_mask = inputs["input_mask"] + self.feed_var_names = input_ids.name + ';' + position_ids.name + ';' + segment_ids.name + ';' + input_mask.name + self.reader = hub.reader.ClassifyReader( + vocab_path=module.get_vocab_path(), + dataset=None, + max_seq_len=self.max_seq_len, + do_lower_case=self.do_lower_case) + self.reader_flag = True + + def add_server(self, server='127.0.0.1:8010'): + self.server_list.append(server) + self.check_server() + + def add_server_list(self, server_list): + for server_str in server_list: + self.server_list.append(server_str) + self.check_server() + + def check_server(self): + for server in self.server_list: + client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_ip = server.split(':')[0] + server_port = int(server.split(':')[1]) + client.connect((server_ip, server_port)) + client.send('pending server') + response = client.recv(1024).decode() + + response_list = response.split('\t') + status_code = int(response_list[0].split(':')[1]) + + if status_code == 0: + server_model = response_list[1].split(':')[1] + if server_model == self.model_name: + serving_port = response_list[2].split(':')[1] + serving_ip = server_ip + self.serving_list.append(serving_ip + ':' + serving_port) + else: + logger.error('model_name not match, server {} using : {} '. + format(server, server_model)) + else: + error_msg = response_list[1] + logger.error('connect server {} failed. {}'.format(server, + error_msg)) + + def request_server(self, request_msg): + if self.load_balance == 'round_robin': + try: + cur_con = httplib.HTTPConnection(self.serving_list[ + self.con_index]) + cur_con.request('POST', "/BertService/inference", request_msg, + {"Content-Type": "application/json"}) + response = cur_con.getresponse() + response_msg = response.read() + response_msg = ujson.loads(response_msg) + self.con_index += 1 + self.con_index = self.con_index % len(self.serving_list) + return response_msg + + except BaseException as err: + logger.warning("Infer Error with server {} : {}".format( + self.serving_list[self.con_index], err)) + if len(self.serving_list) == 0: + logger.error('All server failed, process will exit') + return 'fail' + else: + self.con_index += 1 + return 'retry' + + elif self.load_balance == 'random': + try: + random.seed() + self.con_index = random.randint(0, len(self.serving_list) - 1) + logger.info(self.con_index) + cur_con = httplib.HTTPConnection(self.serving_list[ + self.con_index]) + cur_con.request('POST', "/BertService/inference", request_msg, + {"Content-Type": "application/json"}) + response = cur_con.getresponse() + response_msg = response.read() + response_msg = ujson.loads(response_msg) + + return response_msg + except BaseException as err: + + logger.warning("Infer Error with server {} : {}".format( + self.serving_list[self.con_index], err)) + if len(self.serving_list) == 0: + logger.error('All server failed, process will exit') + return 'fail' + else: + self.con_index = random.randint(0, + len(self.serving_list) - 1) + return 'retry' + + elif self.load_balance == 'bind': + + try: + self.con_index = int(self.process_id) % len(self.serving_list) + cur_con = httplib.HTTPConnection(self.serving_list[ + self.con_index]) + cur_con.request('POST', "/BertService/inference", request_msg, + {"Content-Type": "application/json"}) + response = cur_con.getresponse() + response_msg = response.read() + response_msg = ujson.loads(response_msg) + + return response_msg + except BaseException as err: + + logger.warning("Infer Error with server {} : {}".format( + self.serving_list[self.con_index], err)) + if len(self.serving_list) == 0: + logger.error('All server failed, process will exit') + return 'fail' + else: + self.con_index = int(self.process_id) % len( + self.serving_list) + return 'retry' + + def prepare_data(self, text): + self.batch_size = len(text) + data_generator = self.reader.data_generator( + batch_size=self.batch_size, phase='predict', data=text) + result = [] + for run_step, batch in enumerate(data_generator(), start=1): + request = [] + token_list = batch[0][0].reshape(-1).tolist() + pos_list = batch[0][1].reshape(-1).tolist() + sent_list = batch[0][2].reshape(-1).tolist() + mask_list = batch[0][3].reshape(-1).tolist() + for si in range(self.batch_size): + instance_dict = {} + instance_dict["token_ids"] = token_list[si * self.max_seq_len:( + si + 1) * self.max_seq_len] + instance_dict["sentence_type_ids"] = sent_list[ + si * self.max_seq_len:(si + 1) * self.max_seq_len] + instance_dict["position_ids"] = pos_list[si * self.max_seq_len:( + si + 1) * self.max_seq_len] + instance_dict["input_masks"] = mask_list[si * self.max_seq_len:( + si + 1) * self.max_seq_len] + request.append(instance_dict) + + request = {"instances": request} + request["max_seq_len"] = self.max_seq_len + request["feed_var_names"] = self.feed_var_names + request_msg = ujson.dumps(request) + if self.show_ids: + logger.info(request_msg) + + return request_msg + + def encode(self, text): + if len(self.serving_list) == 0: + logger.error('No match server.') + return -1 + if type(text) != list: + raise TypeError('Only support list') + request_msg = self.prepare_data(text) + + response_msg = self.request_server(request_msg) + retry = 0 + while type(response_msg) == str and response_msg == 'retry': + if retry < self.retry: + retry += 1 + logger.info('Try to connect another servers') + response_msg = self.request_server(request_msg) + else: + logger.error('Request failed after {} times retry'.format( + self.retry)) + break + retry = 0 + result = [] + for msg in response_msg["instances"]: + for sample in msg["instances"]: + result.append(sample["values"]) + + #request end + return result + + +def test(): + + bc = BertService( + model_name='bert_chinese_L-12_H-768_A-12', + max_seq_len=20, + show_ids=False, + do_lower_case=True) + bc.add_server('127.0.0.1:8010') + result = bc.encode([["远上寒山石径斜"], ]) + print(result[0]) + + +if __name__ == '__main__': + test() diff --git a/paddle-gpu-serving/paddle-gpu-serving/README.md b/paddle-gpu-serving/paddle-gpu-serving/README.md new file mode 100644 index 00000000..720ee04d --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/README.md @@ -0,0 +1,45 @@ +# paddle-gpu-serving + +paddle-gpu-serving简介是基于Paddle Serving框架的模型预测服务库,通过少量代码即可部署和使用指定的模型进行远程预测。目前支持使用PaddleHub语义理解模型库中的BERT类模型获取文本对应的向量表示。其中serving服务的代码参考[bert模型服务demo server端](../../demo-serving/op/bert_service_op.cpp)。 + +## 安装 + +### server端 + +环境要求:python3,paddlepaddle>=1.6,paddlehub>=1.4 + +```bash +pip install paddle-gpu-serving +``` + +### client端 + +环境要求:ujson,python2或python3 + +下载[bert_service.py脚本](../client/bert_service/bert_service.py)使用 + +## 使用 + +### server端 + +```python +from paddle_gpu_serving.run import BertServer +bs = BertServer(with_gpu=True) +bs.with_model('bert_chinese_L-12_H-768_A-12') +bs.run(gpu_index = 0, port = 8010) +``` + + + +### client端 + +```python +bc = BertService( + model_name='bert_chinese_L-12_H-768_A-12', + max_seq_len=20, + show_ids=False, + do_lower_case=True) +bc.add_server('127.0.0.1:8010') +result = bc.encode([["远上寒山石径斜"], ]) +print(result[0]) +``` diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/__init__.py b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/__init__.py new file mode 100644 index 00000000..b836a7e4 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = '0.8.2' diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/run/__init__.py b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/run/__init__.py new file mode 100644 index 00000000..5d5542ce --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/run/__init__.py @@ -0,0 +1,262 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import tarfile +import paddle_gpu_serving +import subprocess +import imp +import time +import socket +from contextlib import closing + + +class BertServer(): + def __init__(self, with_gpu=True): + try: + imp.find_module('paddlehub') + self.paddlehub_found = True + print('Working with paddlehub') + except ImportError: + self.paddlehub_found = False + os.chdir(self.get_path()) + self.with_gpu_flag = with_gpu + self.p_list = [] + self.use_other_model = False + self.run_m = False + self.model_url = 'https://paddle-serving.bj.bcebos.com/data/bert' + self.bin_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/bin' + self.cpu_run_cmd = './bin/serving-cpu --logtostderr=true ' + self.gpu_run_cmd = './bin/serving-gpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true ' + self.model_path_str = '' + self.get_exe() + + def get_exe(self): + exe_path = './bin' + module_version = paddle_gpu_serving.__version__ + target_version_list = module_version.strip().split('.') + target_version = target_version_list[0] + '.' + target_version_list[1] + need_download = False + + if os.path.exists(exe_path): + with open('./bin/serving-version.txt') as f: + serving_version = f.read().strip() + if serving_version != target_version: + need_download = True + else: + need_download = True + if need_download: + tar_name = 'paddle-gpu-serving-' + target_version + '-bin.tar.gz' + bin_url = self.bin_url + '/' + tar_name + print('Frist time run, downloading PaddleServing components ...') + r = os.system('wget ' + bin_url + ' --no-check-certificate') + if r != 0: + print('Download failed') + if os.path.exists(tar_name): + os.remove(tar_name) + else: + try: + print('Decompressing files ..') + tar = tarfile.open(tar_name) + tar.extractall() + tar.close() + except: + if os.path.exists(exe_path): + os.remove(exe_path) + finally: + os.remove(tar_name) + + def build_server(self): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(('127.0.0.1', self.port)) + sock.listen(5) + print('Main server serving on {} port.'.format(self.port)) + while True: + con, addr = sock.accept() + request = con.recv(1024) + response = 'status:0\tmodel name:' + str( + self.model_name) + '\t' + 'serving port:' + str( + self.serving_port) + + con.send(bytes(response, encoding='utf-8')) + con.close() + + def modify_conf(self, gpu_index=0): + os.chdir(self.get_path()) + if not self.with_gpu_flag: + with open('./conf/model_toolkit.prototxt', 'r') as f: + conf_str = f.read() + conf_str = re.sub('GPU', 'CPU', conf_str) + conf_str = re.sub('model_data_path.*"', self.model_path_str, + conf_str) + conf_str = re.sub('enable_memory_optimization: 0', + 'enable_memory_optimization: 1', conf_str) + open('./conf/model_toolkit.prototxt', 'w').write(conf_str) + + else: + conf_file = './conf/model_toolkit.prototxt.' + str(gpu_index) + with open(conf_file, 'r') as f: + conf_str = f.read() + conf_str = re.sub('CPU', 'GPU', conf_str) + conf_str = re.sub('model_data_path.*"', self.model_path_str, + conf_str) + conf_str = re.sub('enable_memory_optimization: 0', + 'enable_memory_optimization: 1', conf_str) + open(conf_file, 'w').write(conf_str) + + def find_serving_port(self): + for i in range(1000): + port = 9000 + i + with closing(socket.socket(socket.AF_INET, + socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('127.0.0.1', port)) + if result != 0: + return port + return -1 + + def hold(self): + try: + self.build_server() + except KeyboardInterrupt: + print("Server is going to quit") + time.sleep(5) + + def run(self, gpu_index=0, port=8866): + + self.port = port + os.chdir(self.get_path()) + self.modify_conf(gpu_index) + serving_port = self.find_serving_port() + if serving_port < 0: + print('No port available.') + return -1 + self.serving_port = serving_port + + if self.with_gpu_flag == True: + gpu_msg = '--gpuid=' + str(gpu_index) + ' ' + run_cmd = self.gpu_run_cmd + gpu_msg + run_cmd += '--port=' + str( + serving_port) + ' ' + '--resource_file=resource.prototxt.' + str( + gpu_index) + ' ' + print('Start serving on gpu ' + str(gpu_index) + ' port = ' + str( + serving_port)) + else: + re = subprocess.Popen( + 'cat /usr/local/cuda/version.txt > tmp 2>&1', shell=True) + re.wait() + if re.returncode == 0: + run_cmd = self.gpu_run_cmd + '--port=' + str(serving_port) + ' ' + else: + run_cmd = self.cpu_run_cmd + '--port=' + str(serving_port) + ' ' + print('Start serving on cpu port = {}'.format(serving_port)) + + process = subprocess.Popen(run_cmd, shell=True) + + self.p_list.append(process) + if not self.run_m: + self.hold() + + def run_multi(self, gpu_index_list=[], port_list=[]): + self.run_m = True + if len(port_list) < 1: + print('Please set one port at least.') + return -1 + if self.with_gpu_flag == True: + if len(gpu_index_list) != len(port_list): + print('Expect same length of gpu_index_list and port_list.') + return -1 + for gpu_index, port in zip(gpu_index_list, port_list): + self.run(gpu_index=gpu_index, port=port) + else: + for port in port_list: + self.run(port=port) + self.hold() + + def stop(self): + for p in self.p_list: + p.kill() + + def with_model(self, model_name=None, model_url=None): + if model_name == None or type(model_name) != str: + print('Please set model name string') + self.model_name = model_name + os.chdir(self.get_path()) + self.get_model(model_name) + + def get_path(self): + py_path = os.path.dirname(paddle_gpu_serving.__file__) + server_path = os.path.join(py_path, 'server') + return server_path + + def get_model(self, model_name): + server_path = self.get_path() + if not self.paddlehub_found or self.use_other_model: + tar_name = model_name + '.tar.gz' + model_url = self.model_url + '/' + tar_name + + model_path = os.path.join(server_path, 'data/model/paddle/fluid') + if not os.path.exists(model_path): + os.makedirs('data/model/paddle/fluid') + os.chdir(model_path) + if os.path.exists(model_name): + pass + else: + os.system('wget ' + model_url + ' --no-check-certificate') + print('Decompressing files ..') + tar = tarfile.open(tar_name) + tar.extractall() + tar.close() + os.remove(tar_name) + + self.model_path_str = r'model_data_path: "./data/model/paddle/fluid/' + model_name + r'"' + + else: + import paddlehub as hub + import paddle.fluid as fluid + + paddlehub_modules_path = os.path.expanduser('~/.paddlehub') + paddlehub_bert_path = os.path.join(paddlehub_modules_path, + 'bert_service') + model_path = os.path.join(paddlehub_bert_path, model_name) + self.model_path_str = r'model_data_path: "' + model_path + r'"' + + if not os.path.exists(model_path): + print('Save model for serving ...') + module = hub.Module(name=model_name) + inputs, outputs, program = module.context( + trainable=True, max_seq_len=128) + place = fluid.core_avx.CPUPlace() + exe = fluid.Executor(place) + input_ids = inputs["input_ids"] + position_ids = inputs["position_ids"] + segment_ids = inputs["segment_ids"] + input_mask = inputs["input_mask"] + feed_var_names = [ + input_ids.name, position_ids.name, segment_ids.name, + input_mask.name + ] + target_vars = [ + outputs["pooled_output"], outputs["sequence_output"] + ] + os.makedirs(model_path) + fluid.io.save_inference_model( + feeded_var_names=feed_var_names, + target_vars=target_vars, + main_program=program, + executor=exe, + dirname=model_path) + + os.chdir(self.get_path()) diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/cube.conf b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/cube.conf new file mode 100644 index 00000000..35310301 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/cube.conf @@ -0,0 +1,15 @@ +[{ + "dict_name": "dict", + "shard": 2, + "dup": 1, + "timeout": 200, + "retry": 3, + "backup_request": 100, + "type": "ipport_list", + "load_balancer": "rr", + "nodes": [{ + "ipport_list": "list://xxx.xxx.xxx.xxx:8000" + },{ + "ipport_list": "list://xxx.xxx.xxx.xxx:8000" + }] +}] diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/gflags.conf b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/gflags.conf new file mode 100644 index 00000000..b4eedcc8 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/gflags.conf @@ -0,0 +1,2 @@ +--enable_model_toolkit +--enable_cube=true diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.0 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.0 new file mode 100644 index 00000000..77973447 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.0 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 0 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.1 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.1 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.1 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.10 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.10 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.10 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.11 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.11 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.11 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.12 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.12 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.12 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.13 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.13 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.13 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.14 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.14 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.14 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.15 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.15 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.15 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.2 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.2 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.2 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.3 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.3 new file mode 100644 index 00000000..77973447 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.3 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 0 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.4 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.4 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.4 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.5 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.5 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.5 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.6 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.6 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.6 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.7 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.7 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.7 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.8 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.8 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.8 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.9 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.9 new file mode 100644 index 00000000..a5352b79 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/model_toolkit.prototxt.9 @@ -0,0 +1,11 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 + enable_memory_optimization: 1 +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt new file mode 100644 index 00000000..0a0d6678 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.0 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.0 new file mode 100644 index 00000000..c2023677 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.0 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.0" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.1 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.1 new file mode 100644 index 00000000..d7c15d4d --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.1 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.1" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.10 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.10 new file mode 100644 index 00000000..74b0033a --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.10 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.10" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.11 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.11 new file mode 100644 index 00000000..91bd7417 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.11 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.11" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.12 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.12 new file mode 100644 index 00000000..483b713d --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.12 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.12" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.13 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.13 new file mode 100644 index 00000000..0a3d25a0 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.13 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.13" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.14 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.14 new file mode 100644 index 00000000..64cb4cff --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.14 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.14" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.15 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.15 new file mode 100644 index 00000000..46219379 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.15 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.15" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.2 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.2 new file mode 100644 index 00000000..45509468 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.2 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.2" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.3 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.3 new file mode 100644 index 00000000..3aadbe34 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.3 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.3" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.4 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.4 new file mode 100644 index 00000000..eec742fc --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.4 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.4" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.5 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.5 new file mode 100644 index 00000000..5db39ee9 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.5 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.5" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.6 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.6 new file mode 100644 index 00000000..ae65b251 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.6 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.6" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.7 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.7 new file mode 100644 index 00000000..592f0f15 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.7 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.7" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.8 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.8 new file mode 100644 index 00000000..ccd84971 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.8 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.8" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.9 b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.9 new file mode 100644 index 00000000..5e123811 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/resource.prototxt.9 @@ -0,0 +1,3 @@ +model_toolkit_path: "./conf/" +model_toolkit_file: "model_toolkit.prototxt.9" +cube_config_file: "./conf/cube.conf" diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/service.prototxt b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/service.prototxt new file mode 100644 index 00000000..e630669e --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/service.prototxt @@ -0,0 +1,4 @@ +services { + name: "BertService" + workflows: "workflow9" +} diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/workflow.prototxt b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/workflow.prototxt new file mode 100644 index 00000000..06f3ac40 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/conf/workflow.prototxt @@ -0,0 +1,94 @@ +workflows { + name: "workflow1" + workflow_type: "Sequence" + nodes { + name: "dense_echo_op" + type: "DenseEchoOp" + } +} + +workflows { + name: "workflow2" + workflow_type: "Sequence" + nodes { + name: "sparse_echo_op" + type: "SparseEchoOp" + dependencies { + name: "startup_op" + mode: "RO" + } + } +} +workflows { + name: "workflow3" + workflow_type: "Sequence" + nodes { + name: "echo_op" + type: "CommonEchoOp" + } +} +workflows { + name: "workflow4" + workflow_type: "Sequence" + nodes { + name: "image_reader_op" + type: "ReaderOp" + } + nodes { + name: "image_classify_op" + type: "ClassifyOp" + dependencies { + name: "image_reader_op" + mode: "RO" + } + } + nodes { + name: "write_json_op" + type: "WriteJsonOp" + dependencies { + name: "image_classify_op" + mode: "RO" + } + } +} +workflows { + name: "workflow5" + workflow_type: "Sequence" + nodes { + name: "int64tensor_echo_op" + type: "Int64TensorEchoOp" + } +} +workflows { + name: "workflow6" + workflow_type: "Sequence" + nodes { + name: "text_classify_op" + type: "TextClassificationOp" + } +} +workflows { + name: "workflow7" + workflow_type: "Sequence" + nodes { + name: "echo_kvdb_service_op" + type: "KVDBEchoOp" + } +} +workflows { + name: "workflow8" + workflow_type: "Sequence" + nodes { + name: "ctr_prediction_service_op" + type: "CTRPredictionOp" + } +} +workflows { + name: "workflow9" + workflow_type: "Sequence" + nodes { + name: "bert_service_op" + type: "BertServiceOp" + } +} + diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_reload_flag b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_reload_flag new file mode 100644 index 00000000..a1866984 --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_reload_flag @@ -0,0 +1,2 @@ +paddle fluid model +time:20180531 diff --git a/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_time_file b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_time_file new file mode 100644 index 00000000..4d9422cd --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/paddle_gpu_serving/server/data/model/paddle/fluid_time_file @@ -0,0 +1,2 @@ +201805311000 +model paddle fluid diff --git a/paddle-gpu-serving/paddle-gpu-serving/setup.py b/paddle-gpu-serving/paddle-gpu-serving/setup.py new file mode 100644 index 00000000..7e3b46bb --- /dev/null +++ b/paddle-gpu-serving/paddle-gpu-serving/setup.py @@ -0,0 +1,50 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +#read info +info_py = './paddle_gpu_serving/__init__.py' +info_content = open(info_py, 'r').readlines() +version_line = [ + l.strip() for l in info_content if l.startswith('__version__') +][0] +exec (version_line) # produce __version__ + +setuptools.setup( + name="paddle-gpu-serving", + version=__version__, + author="MRXLT", + author_email="xlt2024@gmail.com", + description="package for paddle serving with bert", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/PaddlePaddle/Serving", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.5', + package_data={ + 'paddle_gpu_serving': [ + 'server/conf/*', + 'server/data/model/paddle/fluid_reload_flag', + 'server/data/model/paddle/fluid_time_file', + ] + }) -- GitLab