diff --git a/core/general-client/src/elog b/core/general-client/src/elog deleted file mode 100644 index 33edf65aab6e7a4778f160dde6585216181c4cfb..0000000000000000000000000000000000000000 --- a/core/general-client/src/elog +++ /dev/null @@ -1 +0,0 @@ -make: *** No targets specified and no makefile found. Stop. diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index 81358bf6c12244e8dd9a8f3d27161f9d86d46800..6cec9d1cb4d87e13c566c24c90cef11f148749c8 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -36,8 +36,9 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { + VLOG(2) << "Going to run inference"; const GeneralBlob *input_blob = get_depend_argument(pre_name()); - + VLOG(2) << "Get precedent op name: " << pre_name(); GeneralBlob *output_blob = mutable_data(); if (!input_blob) { @@ -48,6 +49,8 @@ int GeneralInferOp::inference() { const TensorVector *in = &input_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector; int batch_size = input_blob->GetBatchSize(); + VLOG(2) << "input batch size: " << batch_size; + output_blob->SetBatchSize(batch_size); VLOG(2) << "infer batch size: " << batch_size; diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index 77dd299c818cf2af2cee05b7cf14e7b490f79e19..81df8171d30d7ebd2c208ad31cda5aea0564c50e 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -45,7 +45,9 @@ int GeneralTextReaderOp::inference() { std::vector capacity; GeneralBlob *res = mutable_data(); - TensorVector *in = &res->tensor_vector; + TensorVector *out = &res->tensor_vector; + + res->SetBatchSize(batch_size); if (!res) { LOG(ERROR) << "Failed get op tls reader object output"; @@ -103,23 +105,23 @@ int GeneralTextReaderOp::inference() { VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; } lod_tensor.name = model_config->_feed_name[i]; - in->push_back(lod_tensor); + out->push_back(lod_tensor); } for (int i = 0; i < var_num; ++i) { - if (in->at(i).lod.size() == 1) { + if (out->at(i).lod.size() == 1) { for (int j = 0; j < batch_size; ++j) { const Tensor &tensor = req->insts(j).tensor_array(i); int data_len = tensor.int_data_size(); - int cur_len = in->at(i).lod[0].back(); - in->at(i).lod[0].push_back(cur_len + data_len); + int cur_len = out->at(i).lod[0].back(); + out->at(i).lod[0].push_back(cur_len + data_len); } - in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); - in->at(i).shape = {in->at(i).lod[0].back(), 1}; + out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]); + out->at(i).shape = {out->at(i).lod[0].back(), 1}; VLOG(2) << "var[" << i - << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + << "] is lod_tensor and len=" << out->at(i).lod[0].back(); } else { - in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); VLOG(2) << "var[" << i << "] is tensor and capacity=" << batch_size * capacity[i]; } @@ -127,7 +129,7 @@ int GeneralTextReaderOp::inference() { for (int i = 0; i < var_num; ++i) { if (elem_type[i] == 0) { - int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int64_t *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; @@ -136,14 +138,14 @@ int GeneralTextReaderOp::inference() { dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } } } else { - float *dst_ptr = static_cast(in->at(i).data.data()); + float *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; @@ -152,8 +154,8 @@ int GeneralTextReaderOp::inference() { dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } @@ -162,6 +164,7 @@ int GeneralTextReaderOp::inference() { } int64_t end = timeline.TimeStampUS(); + res->p_size = 0; AddBlobInfo(res, start); AddBlobInfo(res, end); diff --git a/go/serving_client/serving_client_api.go b/go/serving_client/serving_client_api.go index bb77897f08dce2f57f39d9f5c9f183c6d7c323c1..3d4b95f28c869143f6a97e7fd060fc1ee84c7540 100644 --- a/go/serving_client/serving_client_api.go +++ b/go/serving_client/serving_client_api.go @@ -126,13 +126,13 @@ func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map inst.TensorArray = tensor_array - var profiletime bool - profiletime = false + var profile_server bool + profile_server = false req := &Request{ Insts: []FeedInst{inst}, FetchVarNames: fetch, - ProfileTime: profiletime} + ProfileServer: profile_server} b, err := json.Marshal(req) diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index b7458e931a3c33ea3289ff3df9cd464ceb52fbac..6b454320136fc20eb4b6cee92c4ad607a80a41f8 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -98,9 +98,6 @@ class Server(object): def set_port(self, port): self.port = port - def set_vlog_level(self, vlog_level): - self.vlog_level = vlog_level - def set_reload_interval(self, interval): self.reload_interval_s = interval @@ -250,6 +247,8 @@ class Server(object): # currently we do not load cube if not self.use_local_bin: self.download_bin() + else: + print("Use local bin") command = "{} " \ "-enable_model_toolkit " \ "-inferservice_path {} " \ @@ -262,8 +261,7 @@ class Server(object): "-resource_file {} " \ "-workflow_path {} " \ "-workflow_file {} " \ - "-bthread_concurrency {} " \ - "-v {} ".format( + "-bthread_concurrency {} ".format( self.bin_path, self.workdir, self.infer_service_fn, @@ -275,6 +273,7 @@ class Server(object): self.resource_fn, self.workdir, self.workflow_fn, - self.num_threads, - self.vlog_level) + self.num_threads) + print("Going to Run Comand") + print(command) os.system(command) diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index eb40cd7c3dec1464c042545040eb20e75d415230..fe9dcd4dc8cb7cea51a0ac180796c340a22ef8ae 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -15,10 +15,31 @@ Usage: Host a trained paddle model with one line command Example: - python -m paddle_serving_server.serve model 9292 + python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 """ +import argparse + +def parse_args(): + parser = argparse.ArgumentParser("serve") + parser.add_argument("--thread", type=int, default=10, help="Concurrency of server") + parser.add_argument("--model", type=str, default="", help="Model for serving") + parser.add_argument("--port", type=int, default=9292, help="Port the server") + parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service") + parser.add_argument("--device", type=str, default="cpu", help="Type of device") + return parser.parse_args() + +def start_standard_model(): + args = parse_args() + thread_num = args.thread + model = args.model + port = args.port + workdir = args.workdir + device = args.device + + if model == "": + print("You must specify your serving model") + exit(-1) -def start_standard_model(model_folder, port, thread_num): import paddle_serving_server as serving op_maker = serving.OpMaker() read_op = op_maker.create('general_reader') @@ -30,17 +51,13 @@ def start_standard_model(model_folder, port, thread_num): op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) - server = Server() + server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) - server.set_num_thread(thread_num) + server.set_num_threads(thread_num) - server.load_model_config(model_folder) - port = port - server.prepare_server(workdir="workdir", port=port, device="cpu") + server.load_model_config(model) + server.prepare_server(workdir=workdir, port=port, device=device) server.run_server() if __name__ == "__main__": - if len(sys.argv) != 4: - print("{} model_folder port thread".format(sys.argv[0])) - sys.exit(0) - start_standard_model(sys.argv[1], int(sys.argv[2]), int(sys.argv[3])) + start_standard_model()