提交 9d047503 编写于 作者: M MRXLT 提交者: GitHub

Merge pull request #193 from guru4elephant/refine_serve

Refine serve
make: *** No targets specified and no makefile found. Stop.
...@@ -36,8 +36,9 @@ using baidu::paddle_serving::predictor::InferManager; ...@@ -36,8 +36,9 @@ using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralInferOp::inference() { int GeneralInferOp::inference() {
VLOG(2) << "Going to run inference";
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name()); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name());
VLOG(2) << "Get precedent op name: " << pre_name();
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!input_blob) { if (!input_blob) {
...@@ -48,6 +49,8 @@ int GeneralInferOp::inference() { ...@@ -48,6 +49,8 @@ int GeneralInferOp::inference() {
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size;
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "infer batch size: " << batch_size;
......
...@@ -45,7 +45,9 @@ int GeneralTextReaderOp::inference() { ...@@ -45,7 +45,9 @@ int GeneralTextReaderOp::inference() {
std::vector<int64_t> capacity; std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *in = &res->tensor_vector; TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "Failed get op tls reader object output";
...@@ -103,23 +105,23 @@ int GeneralTextReaderOp::inference() { ...@@ -103,23 +105,23 @@ int GeneralTextReaderOp::inference() {
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i];
} }
lod_tensor.name = model_config->_feed_name[i]; lod_tensor.name = model_config->_feed_name[i];
in->push_back(lod_tensor); out->push_back(lod_tensor);
} }
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) { if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i); const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.int_data_size(); int data_len = tensor.int_data_size();
int cur_len = in->at(i).lod[0].back(); int cur_len = out->at(i).lod[0].back();
in->at(i).lod[0].push_back(cur_len + data_len); out->at(i).lod[0].push_back(cur_len + data_len);
} }
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1}; out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i VLOG(2) << "var[" << i
<< "] is lod_tensor and len=" << in->at(i).lod[0].back(); << "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else { } else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i VLOG(2) << "var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
...@@ -127,7 +129,7 @@ int GeneralTextReaderOp::inference() { ...@@ -127,7 +129,7 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) { if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data()); int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = 0; for (int k = 0;
...@@ -136,14 +138,14 @@ int GeneralTextReaderOp::inference() { ...@@ -136,14 +138,14 @@ int GeneralTextReaderOp::inference() {
dst_ptr[offset + k] = dst_ptr[offset + k] =
req->insts(j).tensor_array(i).int_data(k); req->insts(j).tensor_array(i).int_data(k);
} }
if (in->at(i).lod.size() == 1) { if (out->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1]; offset = out->at(i).lod[0][j + 1];
} else { } else {
offset += capacity[i]; offset += capacity[i];
} }
} }
} else { } else {
float *dst_ptr = static_cast<float *>(in->at(i).data.data()); float *dst_ptr = static_cast<float *>(out->at(i).data.data());
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = 0; for (int k = 0;
...@@ -152,8 +154,8 @@ int GeneralTextReaderOp::inference() { ...@@ -152,8 +154,8 @@ int GeneralTextReaderOp::inference() {
dst_ptr[offset + k] = dst_ptr[offset + k] =
req->insts(j).tensor_array(i).int_data(k); req->insts(j).tensor_array(i).int_data(k);
} }
if (in->at(i).lod.size() == 1) { if (out->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1]; offset = out->at(i).lod[0][j + 1];
} else { } else {
offset += capacity[i]; offset += capacity[i];
} }
...@@ -162,6 +164,7 @@ int GeneralTextReaderOp::inference() { ...@@ -162,6 +164,7 @@ int GeneralTextReaderOp::inference() {
} }
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
......
...@@ -126,13 +126,13 @@ func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map ...@@ -126,13 +126,13 @@ func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map
inst.TensorArray = tensor_array inst.TensorArray = tensor_array
var profiletime bool var profile_server bool
profiletime = false profile_server = false
req := &Request{ req := &Request{
Insts: []FeedInst{inst}, Insts: []FeedInst{inst},
FetchVarNames: fetch, FetchVarNames: fetch,
ProfileTime: profiletime} ProfileServer: profile_server}
b, err := json.Marshal(req) b, err := json.Marshal(req)
......
...@@ -98,9 +98,6 @@ class Server(object): ...@@ -98,9 +98,6 @@ class Server(object):
def set_port(self, port): def set_port(self, port):
self.port = port self.port = port
def set_vlog_level(self, vlog_level):
self.vlog_level = vlog_level
def set_reload_interval(self, interval): def set_reload_interval(self, interval):
self.reload_interval_s = interval self.reload_interval_s = interval
...@@ -250,6 +247,8 @@ class Server(object): ...@@ -250,6 +247,8 @@ class Server(object):
# currently we do not load cube # currently we do not load cube
if not self.use_local_bin: if not self.use_local_bin:
self.download_bin() self.download_bin()
else:
print("Use local bin")
command = "{} " \ command = "{} " \
"-enable_model_toolkit " \ "-enable_model_toolkit " \
"-inferservice_path {} " \ "-inferservice_path {} " \
...@@ -262,8 +261,7 @@ class Server(object): ...@@ -262,8 +261,7 @@ class Server(object):
"-resource_file {} " \ "-resource_file {} " \
"-workflow_path {} " \ "-workflow_path {} " \
"-workflow_file {} " \ "-workflow_file {} " \
"-bthread_concurrency {} " \ "-bthread_concurrency {} ".format(
"-v {} ".format(
self.bin_path, self.bin_path,
self.workdir, self.workdir,
self.infer_service_fn, self.infer_service_fn,
...@@ -275,6 +273,7 @@ class Server(object): ...@@ -275,6 +273,7 @@ class Server(object):
self.resource_fn, self.resource_fn,
self.workdir, self.workdir,
self.workflow_fn, self.workflow_fn,
self.num_threads, self.num_threads)
self.vlog_level) print("Going to Run Comand")
print(command)
os.system(command) os.system(command)
...@@ -15,10 +15,31 @@ ...@@ -15,10 +15,31 @@
Usage: Usage:
Host a trained paddle model with one line command Host a trained paddle model with one line command
Example: Example:
python -m paddle_serving_server.serve model 9292 python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
""" """
import argparse
def parse_args():
parser = argparse.ArgumentParser("serve")
parser.add_argument("--thread", type=int, default=10, help="Concurrency of server")
parser.add_argument("--model", type=str, default="", help="Model for serving")
parser.add_argument("--port", type=int, default=9292, help="Port the server")
parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service")
parser.add_argument("--device", type=str, default="cpu", help="Type of device")
return parser.parse_args()
def start_standard_model():
args = parse_args()
thread_num = args.thread
model = args.model
port = args.port
workdir = args.workdir
device = args.device
if model == "":
print("You must specify your serving model")
exit(-1)
def start_standard_model(model_folder, port, thread_num):
import paddle_serving_server as serving import paddle_serving_server as serving
op_maker = serving.OpMaker() op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
...@@ -30,17 +51,13 @@ def start_standard_model(model_folder, port, thread_num): ...@@ -30,17 +51,13 @@ def start_standard_model(model_folder, port, thread_num):
op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op) op_seq_maker.add_op(general_response_op)
server = Server() server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_thread(thread_num) server.set_num_threads(thread_num)
server.load_model_config(model_folder) server.load_model_config(model)
port = port server.prepare_server(workdir=workdir, port=port, device=device)
server.prepare_server(workdir="workdir", port=port, device="cpu")
server.run_server() server.run_server()
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) != 4: start_standard_model()
print("{} model_folder port thread".format(sys.argv[0]))
sys.exit(0)
start_standard_model(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册