提交 0af4cfdb 编写于 作者: M MRXLT

add paddle-gpu-serving

上级 f7b2bc29
......@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::bert_service::EmbeddingValues;
extern int batch_size = 1;
extern int max_seq_len = 128;
extern int layer_num = 12;
extern int emb_size = 768;
extern int thread_num = 1;
extern int max_turn = 1000;
......@@ -95,7 +94,6 @@ int create_req(Request* req,
}
}
req->set_max_seq_len(max_seq_len);
req->set_emb_size(emb_size);
return 0;
}
......
......@@ -59,7 +59,7 @@ int BertServiceOp::inference() {
}
const int64_t MAX_SEQ_LEN = req->max_seq_len();
const int64_t EMB_SIZE = req->emb_size();
// const int64_t EMB_SIZE = req->emb_size();
paddle::PaddleTensor src_ids;
paddle::PaddleTensor pos_ids;
......@@ -172,12 +172,13 @@ int BertServiceOp::inference() {
LOG(INFO) << "batch_size : " << out->at(0).shape[0]
<< " emb_size : " << out->at(0).shape[1];
float *out_data = reinterpret_cast<float *>(out->at(0).data.data());
uint32_t emb_size = out->at(0).shape[1] float *out_data =
reinterpret_cast<float *>(out->at(0).data.data());
for (uint32_t bi = 0; bi < batch_size; bi++) {
BertResInstance *res_instance = res->add_instances();
for (uint32_t si = 0; si < 1; si++) {
EmbeddingValues *emb_instance = res_instance->add_instances();
for (uint32_t ei = 0; ei < EMB_SIZE; ei++) {
for (uint32_t ei = 0; ei < emb_size; ei++) {
uint32_t index = bi * EMB_SIZE + ei;
emb_instance->add_values(out_data[index]);
}
......
# coding:utf-8
import sys
import numpy as np
import paddlehub as hub
import ujson
import random
from paddlehub.common.logger import logger
import socket
_ver = sys.version_info
is_py2 = (_ver[0] == 2)
is_py3 = (_ver[0] == 3)
if is_py2:
import httplib
if is_py3:
import http.client as httplib
class BertService():
def __init__(self,
profile=False,
max_seq_len=128,
model_name="bert_uncased_L-12_H-768_A-12",
show_ids=False,
do_lower_case=True,
process_id=0,
retry=3,
load_balance='round_robin'):
self.process_id = process_id
self.reader_flag = False
self.batch_size = 0
self.max_seq_len = max_seq_len
self.profile = profile
self.model_name = model_name
self.show_ids = show_ids
self.do_lower_case = do_lower_case
self.con_list = []
self.con_index = 0
self.load_balance = load_balance
self.server_list = []
self.serving_list = []
self.feed_var_names = ''
self.retry = retry
module = hub.Module(name=self.model_name)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=self.max_seq_len)
input_ids = inputs["input_ids"]
position_ids = inputs["position_ids"]
segment_ids = inputs["segment_ids"]
input_mask = inputs["input_mask"]
self.feed_var_names = input_ids.name + ';' + position_ids.name + ';' + segment_ids.name + ';' + input_mask.name
self.reader = hub.reader.ClassifyReader(
vocab_path=module.get_vocab_path(),
dataset=None,
max_seq_len=self.max_seq_len,
do_lower_case=self.do_lower_case)
self.reader_flag = True
def add_server(self, server='127.0.0.1:8010'):
self.server_list.append(server)
self.check_server()
def add_server_list(self, server_list):
for server_str in server_list:
self.server_list.append(server_str)
self.check_server()
def check_server(self):
for server in self.server_list:
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_ip = server.split(':')[0]
server_port = int(server.split(':')[1])
client.connect((server_ip, server_port))
client.send('pending server')
response = client.recv(1024).decode()
response_list = response.split('\t')
status_code = int(response_list[0].split(':')[1])
if status_code == 0:
server_model = response_list[1].split(':')[1]
if server_model == self.model_name:
serving_port = response_list[2].split(':')[1]
serving_ip = server_ip
self.serving_list.append(serving_ip + ':' + serving_port)
else:
logger.error('model_name not match, server {} using : {} '.
format(server, server_model))
else:
error_msg = response_list[1]
logger.error('connect server {} failed. {}'.format(server,
error_msg))
def request_server(self, request_msg):
if self.load_balance == 'round_robin':
try:
cur_con = httplib.HTTPConnection(self.serving_list[
self.con_index])
cur_con.request('POST', "/BertService/inference", request_msg,
{"Content-Type": "application/json"})
response = cur_con.getresponse()
response_msg = response.read()
response_msg = ujson.loads(response_msg)
self.con_index += 1
self.con_index = self.con_index % len(self.serving_list)
return response_msg
except BaseException as err:
logger.warning("Infer Error with server {} : {}".format(
self.serving_list[self.con_index], err))
if len(self.serving_list) == 0:
logger.error('All server failed, process will exit')
return 'fail'
else:
self.con_index += 1
return 'retry'
elif self.load_balance == 'random':
try:
random.seed()
self.con_index = random.randint(0, len(self.serving_list) - 1)
logger.info(self.con_index)
cur_con = httplib.HTTPConnection(self.serving_list[
self.con_index])
cur_con.request('POST', "/BertService/inference", request_msg,
{"Content-Type": "application/json"})
response = cur_con.getresponse()
response_msg = response.read()
response_msg = ujson.loads(response_msg)
return response_msg
except BaseException as err:
logger.warning("Infer Error with server {} : {}".format(
self.serving_list[self.con_index], err))
if len(self.serving_list) == 0:
logger.error('All server failed, process will exit')
return 'fail'
else:
self.con_index = random.randint(0,
len(self.serving_list) - 1)
return 'retry'
elif self.load_balance == 'bind':
try:
self.con_index = int(self.process_id) % len(self.serving_list)
cur_con = httplib.HTTPConnection(self.serving_list[
self.con_index])
cur_con.request('POST', "/BertService/inference", request_msg,
{"Content-Type": "application/json"})
response = cur_con.getresponse()
response_msg = response.read()
response_msg = ujson.loads(response_msg)
return response_msg
except BaseException as err:
logger.warning("Infer Error with server {} : {}".format(
self.serving_list[self.con_index], err))
if len(self.serving_list) == 0:
logger.error('All server failed, process will exit')
return 'fail'
else:
self.con_index = int(self.process_id) % len(
self.serving_list)
return 'retry'
def prepare_data(self, text):
self.batch_size = len(text)
data_generator = self.reader.data_generator(
batch_size=self.batch_size, phase='predict', data=text)
result = []
for run_step, batch in enumerate(data_generator(), start=1):
request = []
token_list = batch[0][0].reshape(-1).tolist()
pos_list = batch[0][1].reshape(-1).tolist()
sent_list = batch[0][2].reshape(-1).tolist()
mask_list = batch[0][3].reshape(-1).tolist()
for si in range(self.batch_size):
instance_dict = {}
instance_dict["token_ids"] = token_list[si * self.max_seq_len:(
si + 1) * self.max_seq_len]
instance_dict["sentence_type_ids"] = sent_list[
si * self.max_seq_len:(si + 1) * self.max_seq_len]
instance_dict["position_ids"] = pos_list[si * self.max_seq_len:(
si + 1) * self.max_seq_len]
instance_dict["input_masks"] = mask_list[si * self.max_seq_len:(
si + 1) * self.max_seq_len]
request.append(instance_dict)
request = {"instances": request}
request["max_seq_len"] = self.max_seq_len
request["feed_var_names"] = self.feed_var_names
request_msg = ujson.dumps(request)
if self.show_ids:
logger.info(request_msg)
return request_msg
def encode(self, text):
if len(self.serving_list) == 0:
logger.error('No match server.')
return -1
if type(text) != list:
raise TypeError('Only support list')
request_msg = self.prepare_data(text)
response_msg = self.request_server(request_msg)
retry = 0
while type(response_msg) == str and response_msg == 'retry':
if retry < self.retry:
retry += 1
logger.info('Try to connect another servers')
response_msg = self.request_server(request_msg)
else:
logger.error('Request failed after {} times retry'.format(
self.retry))
break
retry = 0
result = []
for msg in response_msg["instances"]:
for sample in msg["instances"]:
result.append(sample["values"])
#request end
return result
def test():
bc = BertService(
model_name='bert_chinese_L-12_H-768_A-12',
max_seq_len=20,
show_ids=False,
do_lower_case=True)
bc.add_server('127.0.0.1:8010')
result = bc.encode([["远上寒山石径斜"], ])
print(result[0])
if __name__ == '__main__':
test()
# paddle-gpu-serving
paddle-gpu-serving简介是基于Paddle Serving框架的模型预测服务库,通过少量代码即可部署和使用指定的模型进行远程预测。目前支持使用PaddleHub语义理解模型库中的BERT类模型获取文本对应的向量表示。其中serving服务的代码参考[bert模型服务demo server端](../../demo-serving/op/bert_service_op.cpp)
## 安装
### server端
环境要求:python3,paddlepaddle>=1.6,paddlehub>=1.4
```bash
pip install paddle-gpu-serving
```
### client端
环境要求:ujson,python2或python3
下载[bert_service.py脚本](../client/bert_service/bert_service.py)使用
## 使用
### server端
```python
from paddle_gpu_serving.run import BertServer
bs = BertServer(with_gpu=True)
bs.with_model('bert_chinese_L-12_H-768_A-12')
bs.run(gpu_index = 0, port = 8010)
```
### client端
```python
bc = BertService(
model_name='bert_chinese_L-12_H-768_A-12',
max_seq_len=20,
show_ids=False,
do_lower_case=True)
bc.add_server('127.0.0.1:8010')
result = bc.encode([["远上寒山石径斜"], ])
print(result[0])
```
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '0.8.2'
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import tarfile
import paddle_gpu_serving
import subprocess
import imp
import time
import socket
from contextlib import closing
class BertServer():
def __init__(self, with_gpu=True):
try:
imp.find_module('paddlehub')
self.paddlehub_found = True
print('Working with paddlehub')
except ImportError:
self.paddlehub_found = False
os.chdir(self.get_path())
self.with_gpu_flag = with_gpu
self.p_list = []
self.use_other_model = False
self.run_m = False
self.model_url = 'https://paddle-serving.bj.bcebos.com/data/bert'
self.bin_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/bin'
self.cpu_run_cmd = './bin/serving-cpu --logtostderr=true '
self.gpu_run_cmd = './bin/serving-gpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true '
self.model_path_str = ''
self.get_exe()
def get_exe(self):
exe_path = './bin'
module_version = paddle_gpu_serving.__version__
target_version_list = module_version.strip().split('.')
target_version = target_version_list[0] + '.' + target_version_list[1]
need_download = False
if os.path.exists(exe_path):
with open('./bin/serving-version.txt') as f:
serving_version = f.read().strip()
if serving_version != target_version:
need_download = True
else:
need_download = True
if need_download:
tar_name = 'paddle-gpu-serving-' + target_version + '-bin.tar.gz'
bin_url = self.bin_url + '/' + tar_name
print('Frist time run, downloading PaddleServing components ...')
r = os.system('wget ' + bin_url + ' --no-check-certificate')
if r != 0:
print('Download failed')
if os.path.exists(tar_name):
os.remove(tar_name)
else:
try:
print('Decompressing files ..')
tar = tarfile.open(tar_name)
tar.extractall()
tar.close()
except:
if os.path.exists(exe_path):
os.remove(exe_path)
finally:
os.remove(tar_name)
def build_server(self):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('127.0.0.1', self.port))
sock.listen(5)
print('Main server serving on {} port.'.format(self.port))
while True:
con, addr = sock.accept()
request = con.recv(1024)
response = 'status:0\tmodel name:' + str(
self.model_name) + '\t' + 'serving port:' + str(
self.serving_port)
con.send(bytes(response, encoding='utf-8'))
con.close()
def modify_conf(self, gpu_index=0):
os.chdir(self.get_path())
if not self.with_gpu_flag:
with open('./conf/model_toolkit.prototxt', 'r') as f:
conf_str = f.read()
conf_str = re.sub('GPU', 'CPU', conf_str)
conf_str = re.sub('model_data_path.*"', self.model_path_str,
conf_str)
conf_str = re.sub('enable_memory_optimization: 0',
'enable_memory_optimization: 1', conf_str)
open('./conf/model_toolkit.prototxt', 'w').write(conf_str)
else:
conf_file = './conf/model_toolkit.prototxt.' + str(gpu_index)
with open(conf_file, 'r') as f:
conf_str = f.read()
conf_str = re.sub('CPU', 'GPU', conf_str)
conf_str = re.sub('model_data_path.*"', self.model_path_str,
conf_str)
conf_str = re.sub('enable_memory_optimization: 0',
'enable_memory_optimization: 1', conf_str)
open(conf_file, 'w').write(conf_str)
def find_serving_port(self):
for i in range(1000):
port = 9000 + i
with closing(socket.socket(socket.AF_INET,
socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('127.0.0.1', port))
if result != 0:
return port
return -1
def hold(self):
try:
self.build_server()
except KeyboardInterrupt:
print("Server is going to quit")
time.sleep(5)
def run(self, gpu_index=0, port=8866):
self.port = port
os.chdir(self.get_path())
self.modify_conf(gpu_index)
serving_port = self.find_serving_port()
if serving_port < 0:
print('No port available.')
return -1
self.serving_port = serving_port
if self.with_gpu_flag == True:
gpu_msg = '--gpuid=' + str(gpu_index) + ' '
run_cmd = self.gpu_run_cmd + gpu_msg
run_cmd += '--port=' + str(
serving_port) + ' ' + '--resource_file=resource.prototxt.' + str(
gpu_index) + ' '
print('Start serving on gpu ' + str(gpu_index) + ' port = ' + str(
serving_port))
else:
re = subprocess.Popen(
'cat /usr/local/cuda/version.txt > tmp 2>&1', shell=True)
re.wait()
if re.returncode == 0:
run_cmd = self.gpu_run_cmd + '--port=' + str(serving_port) + ' '
else:
run_cmd = self.cpu_run_cmd + '--port=' + str(serving_port) + ' '
print('Start serving on cpu port = {}'.format(serving_port))
process = subprocess.Popen(run_cmd, shell=True)
self.p_list.append(process)
if not self.run_m:
self.hold()
def run_multi(self, gpu_index_list=[], port_list=[]):
self.run_m = True
if len(port_list) < 1:
print('Please set one port at least.')
return -1
if self.with_gpu_flag == True:
if len(gpu_index_list) != len(port_list):
print('Expect same length of gpu_index_list and port_list.')
return -1
for gpu_index, port in zip(gpu_index_list, port_list):
self.run(gpu_index=gpu_index, port=port)
else:
for port in port_list:
self.run(port=port)
self.hold()
def stop(self):
for p in self.p_list:
p.kill()
def with_model(self, model_name=None, model_url=None):
if model_name == None or type(model_name) != str:
print('Please set model name string')
self.model_name = model_name
os.chdir(self.get_path())
self.get_model(model_name)
def get_path(self):
py_path = os.path.dirname(paddle_gpu_serving.__file__)
server_path = os.path.join(py_path, 'server')
return server_path
def get_model(self, model_name):
server_path = self.get_path()
if not self.paddlehub_found or self.use_other_model:
tar_name = model_name + '.tar.gz'
model_url = self.model_url + '/' + tar_name
model_path = os.path.join(server_path, 'data/model/paddle/fluid')
if not os.path.exists(model_path):
os.makedirs('data/model/paddle/fluid')
os.chdir(model_path)
if os.path.exists(model_name):
pass
else:
os.system('wget ' + model_url + ' --no-check-certificate')
print('Decompressing files ..')
tar = tarfile.open(tar_name)
tar.extractall()
tar.close()
os.remove(tar_name)
self.model_path_str = r'model_data_path: "./data/model/paddle/fluid/' + model_name + r'"'
else:
import paddlehub as hub
import paddle.fluid as fluid
paddlehub_modules_path = os.path.expanduser('~/.paddlehub')
paddlehub_bert_path = os.path.join(paddlehub_modules_path,
'bert_service')
model_path = os.path.join(paddlehub_bert_path, model_name)
self.model_path_str = r'model_data_path: "' + model_path + r'"'
if not os.path.exists(model_path):
print('Save model for serving ...')
module = hub.Module(name=model_name)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=128)
place = fluid.core_avx.CPUPlace()
exe = fluid.Executor(place)
input_ids = inputs["input_ids"]
position_ids = inputs["position_ids"]
segment_ids = inputs["segment_ids"]
input_mask = inputs["input_mask"]
feed_var_names = [
input_ids.name, position_ids.name, segment_ids.name,
input_mask.name
]
target_vars = [
outputs["pooled_output"], outputs["sequence_output"]
]
os.makedirs(model_path)
fluid.io.save_inference_model(
feeded_var_names=feed_var_names,
target_vars=target_vars,
main_program=program,
executor=exe,
dirname=model_path)
os.chdir(self.get_path())
[{
"dict_name": "dict",
"shard": 2,
"dup": 1,
"timeout": 200,
"retry": 3,
"backup_request": 100,
"type": "ipport_list",
"load_balancer": "rr",
"nodes": [{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
},{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
}]
}]
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 0
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 0
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.0"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.1"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.10"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.11"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.12"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.13"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.14"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.15"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.2"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.3"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.4"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.5"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.6"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.7"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.8"
cube_config_file: "./conf/cube.conf"
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt.9"
cube_config_file: "./conf/cube.conf"
workflows {
name: "workflow1"
workflow_type: "Sequence"
nodes {
name: "dense_echo_op"
type: "DenseEchoOp"
}
}
workflows {
name: "workflow2"
workflow_type: "Sequence"
nodes {
name: "sparse_echo_op"
type: "SparseEchoOp"
dependencies {
name: "startup_op"
mode: "RO"
}
}
}
workflows {
name: "workflow3"
workflow_type: "Sequence"
nodes {
name: "echo_op"
type: "CommonEchoOp"
}
}
workflows {
name: "workflow4"
workflow_type: "Sequence"
nodes {
name: "image_reader_op"
type: "ReaderOp"
}
nodes {
name: "image_classify_op"
type: "ClassifyOp"
dependencies {
name: "image_reader_op"
mode: "RO"
}
}
nodes {
name: "write_json_op"
type: "WriteJsonOp"
dependencies {
name: "image_classify_op"
mode: "RO"
}
}
}
workflows {
name: "workflow5"
workflow_type: "Sequence"
nodes {
name: "int64tensor_echo_op"
type: "Int64TensorEchoOp"
}
}
workflows {
name: "workflow6"
workflow_type: "Sequence"
nodes {
name: "text_classify_op"
type: "TextClassificationOp"
}
}
workflows {
name: "workflow7"
workflow_type: "Sequence"
nodes {
name: "echo_kvdb_service_op"
type: "KVDBEchoOp"
}
}
workflows {
name: "workflow8"
workflow_type: "Sequence"
nodes {
name: "ctr_prediction_service_op"
type: "CTRPredictionOp"
}
}
workflows {
name: "workflow9"
workflow_type: "Sequence"
nodes {
name: "bert_service_op"
type: "BertServiceOp"
}
}
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
#read info
info_py = './paddle_gpu_serving/__init__.py'
info_content = open(info_py, 'r').readlines()
version_line = [
l.strip() for l in info_content if l.startswith('__version__')
][0]
exec (version_line) # produce __version__
setuptools.setup(
name="paddle-gpu-serving",
version=__version__,
author="MRXLT",
author_email="xlt2024@gmail.com",
description="package for paddle serving with bert",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/PaddlePaddle/Serving",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires='>=3.5',
package_data={
'paddle_gpu_serving': [
'server/conf/*',
'server/data/model/paddle/fluid_reload_flag',
'server/data/model/paddle/fluid_time_file',
]
})
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册