提交 9d047503 编写于 作者: M MRXLT 提交者: GitHub

Merge pull request #193 from guru4elephant/refine_serve

Refine serve
make: *** No targets specified and no makefile found. Stop.
......@@ -36,8 +36,9 @@ using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralInferOp::inference() {
VLOG(2) << "Going to run inference";
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name());
VLOG(2) << "Get precedent op name: " << pre_name();
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!input_blob) {
......@@ -48,6 +49,8 @@ int GeneralInferOp::inference() {
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size;
output_blob->SetBatchSize(batch_size);
VLOG(2) << "infer batch size: " << batch_size;
......
......@@ -45,7 +45,9 @@ int GeneralTextReaderOp::inference() {
std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *in = &res->tensor_vector;
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
if (!res) {
LOG(ERROR) << "Failed get op tls reader object output";
......@@ -103,23 +105,23 @@ int GeneralTextReaderOp::inference() {
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
in->push_back(lod_tensor);
out->push_back(lod_tensor);
}
for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) {
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.int_data_size();
int cur_len = in->at(i).lod[0].back();
in->at(i).lod[0].push_back(cur_len + data_len);
int cur_len = out->at(i).lod[0].back();
out->at(i).lod[0].push_back(cur_len + data_len);
}
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1};
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i
<< "] is lod_tensor and len=" << in->at(i).lod[0].back();
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
......@@ -127,7 +129,7 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0;
......@@ -136,14 +138,14 @@ int GeneralTextReaderOp::inference() {
dst_ptr[offset + k] =
req->insts(j).tensor_array(i).int_data(k);
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
} else {
float *dst_ptr = static_cast<float *>(in->at(i).data.data());
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0;
......@@ -152,8 +154,8 @@ int GeneralTextReaderOp::inference() {
dst_ptr[offset + k] =
req->insts(j).tensor_array(i).int_data(k);
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
......@@ -162,6 +164,7 @@ int GeneralTextReaderOp::inference() {
}
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
......
......@@ -126,13 +126,13 @@ func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map
inst.TensorArray = tensor_array
var profiletime bool
profiletime = false
var profile_server bool
profile_server = false
req := &Request{
Insts: []FeedInst{inst},
FetchVarNames: fetch,
ProfileTime: profiletime}
ProfileServer: profile_server}
b, err := json.Marshal(req)
......
......@@ -98,9 +98,6 @@ class Server(object):
def set_port(self, port):
self.port = port
def set_vlog_level(self, vlog_level):
self.vlog_level = vlog_level
def set_reload_interval(self, interval):
self.reload_interval_s = interval
......@@ -250,6 +247,8 @@ class Server(object):
# currently we do not load cube
if not self.use_local_bin:
self.download_bin()
else:
print("Use local bin")
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
......@@ -262,8 +261,7 @@ class Server(object):
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-v {} ".format(
"-bthread_concurrency {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
......@@ -275,6 +273,7 @@ class Server(object):
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.vlog_level)
self.num_threads)
print("Going to Run Comand")
print(command)
os.system(command)
......@@ -15,10 +15,31 @@
Usage:
Host a trained paddle model with one line command
Example:
python -m paddle_serving_server.serve model 9292
python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
"""
import argparse
def parse_args():
parser = argparse.ArgumentParser("serve")
parser.add_argument("--thread", type=int, default=10, help="Concurrency of server")
parser.add_argument("--model", type=str, default="", help="Model for serving")
parser.add_argument("--port", type=int, default=9292, help="Port the server")
parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service")
parser.add_argument("--device", type=str, default="cpu", help="Type of device")
return parser.parse_args()
def start_standard_model():
args = parse_args()
thread_num = args.thread
model = args.model
port = args.port
workdir = args.workdir
device = args.device
if model == "":
print("You must specify your serving model")
exit(-1)
def start_standard_model(model_folder, port, thread_num):
import paddle_serving_server as serving
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader')
......@@ -30,17 +51,13 @@ def start_standard_model(model_folder, port, thread_num):
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_thread(thread_num)
server.set_num_threads(thread_num)
server.load_model_config(model_folder)
port = port
server.prepare_server(workdir="workdir", port=port, device="cpu")
server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device)
server.run_server()
if __name__ == "__main__":
if len(sys.argv) != 4:
print("{} model_folder port thread".format(sys.argv[0]))
sys.exit(0)
start_standard_model(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]))
start_standard_model()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册