diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 704319644cb68c31b980b14ea8e5b193f3dd22e9..d353ccad36eddf194ba964d64d284ac92a637e18 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -27,6 +27,9 @@ #include "core/sdk-cpp/general_model_service.pb.h" #include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/predictor_sdk.h" +#define BLOG(fmt, ...) \ + printf( \ + "[%s:%s]:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__) using baidu::paddle_serving::sdk_cpp::Predictor; using baidu::paddle_serving::sdk_cpp::PredictorApi; @@ -42,8 +45,19 @@ namespace general_model { class ModelRes { public: ModelRes() {} + ModelRes(const ModelRes& res) { + _engine_name = res._engine_name; + _int64_map.insert(res._int64_map.begin(), res._int64_map.end()); + _float_map.insert(res._float_map.begin(), res._float_map.end()); + } + ModelRes(ModelRes&& res) { + _engine_name = std::move(res._engine_name); + _int64_map.insert(std::make_move_iterator(std::begin(res._int64_map)), + std::make_move_iterator(std::end(res._int64_map))); + _float_map.insert(std::make_move_iterator(std::begin(res._float_map)), + std::make_move_iterator(std::end(res._float_map))); + } ~ModelRes() {} - public: const std::vector>& get_int64_by_name( const std::string& name) { return _int64_map[name]; @@ -55,19 +69,18 @@ class ModelRes { void set_engine_name(const std::string& engine_name) { _engine_name = engine_name; } - const std::string& engine_name() { - return engine_name; - } - ModelRes& operator = (ModelRes&& res) { - std::cout << "move ++++++++>"; + const std::string& engine_name() { return _engine_name; } + ModelRes& operator=(ModelRes&& res) { if (this != &res) { - _int64_map = res._int64_map; - _float_map = res._float_map; - res._int64_map = nullptr; - res._float_map = nullptr; + _engine_name = std::move(res._engine_name); + _int64_map.insert(std::make_move_iterator(std::begin(res._int64_map)), + std::make_move_iterator(std::end(res._int64_map))); + _float_map.insert(std::make_move_iterator(std::begin(res._float_map)), + std::make_move_iterator(std::end(res._float_map))); } return *this; } + public: std::string _engine_name; std::map>> _int64_map; @@ -82,7 +95,7 @@ class PredictorRes { public: void clear() { _models.clear(); - _engine_names.clear(); + _engine_names.clear(); } const std::vector>& get_int64_by_name( const int model_idx, const std::string& name) { @@ -94,16 +107,13 @@ class PredictorRes { } void add_model_res(ModelRes&& res) { _engine_names.push_back(res.engine_name()); - _models.emplace_back(res); + _models.emplace_back(std::move(res)); } void set_variant_tag(const std::string& variant_tag) { _variant_tag = variant_tag; } const std::string& variant_tag() { return _variant_tag; } - int model_num() {return _models.size();} - const std::vector& get_engine_names() { - return _engine_names; - } + const std::vector& get_engine_names() { return _engine_names; } private: std::vector _models; diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index 461037ba38eb367fd069047b1e7a43717e307acf..effb4afb5bc56e6e137d6f4e9fd60947b30b5298 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -219,35 +219,39 @@ int PredictorClient::predict(const std::vector> &float_feed, postprocess_start = client_infer_end; // multi-model output uint32_t model_num = res.outputs_size(); - predict_res._models.resize(model_num); + // predict_res._models.resize(model_num); for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { VLOG(2) << "process model output index: " << m_idx; auto output = res.outputs(m_idx); + ModelRes model; + model.set_engine_name(output.engine_name()); for (auto &name : fetch_name) { int idx = _fetch_name_to_idx[name]; VLOG(2) << "fetch name: " << name; if (_fetch_name_to_type[name] == 0) { int len = output.insts(0).tensor_array(idx).int64_data_size(); VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len; - predict_res._models[m_idx]._int64_map[name].resize(1); - predict_res._models[m_idx]._int64_map[name][0].resize(len); + model._int64_map[name].resize(1); + model._int64_map[name][0].resize(len); for (int i = 0; i < len; ++i) { - predict_res._models[m_idx]._int64_map[name][0][i] = + model._int64_map[name][0][i] = output.insts(0).tensor_array(idx).int64_data(i); } } else if (_fetch_name_to_type[name] == 1) { int len = output.insts(0).tensor_array(idx).float_data_size(); - VLOG(2) << "fetch tensor : " << name << " type: float32 len : " << len; - predict_res._models[m_idx]._float_map[name].resize(1); - predict_res._models[m_idx]._float_map[name][0].resize(len); + VLOG(2) << "fetch tensor : " << name + << " type: float32 len : " << len; + model._float_map[name].resize(1); + model._float_map[name][0].resize(len); for (int i = 0; i < len; ++i) { - predict_res._models[m_idx]._float_map[name][0][i] = + model._float_map[name][0][i] = output.insts(0).tensor_array(idx).float_data(i); } } - //TODO + // TODO postprocess_end = timeline.TimeStampUS(); } + predict_res.add_model_res(std::move(model)); } } @@ -259,7 +263,7 @@ int PredictorClient::predict(const std::vector> &float_feed, << "prepro_1:" << preprocess_end << " " << "client_infer_0:" << client_infer_start << " " << "client_infer_1:" << client_infer_end << " "; - //TODO: multi-model + // TODO: multi-model if (FLAGS_profile_server) { int op_num = res.profile_time_size() / 2; for (int i = 0; i < op_num; ++i) { @@ -303,7 +307,7 @@ int PredictorClient::batch_predict( for (auto &name : fetch_name) { req.add_fetch_var_names(name); } - + for (int bi = 0; bi < batch_size; bi++) { VLOG(2) << "prepare batch " << bi; std::vector tensor_vec; @@ -382,13 +386,15 @@ int PredictorClient::batch_predict( postprocess_start = client_infer_end; uint32_t model_num = res.outputs_size(); - predict_res_batch._models.resize(model_num); + // predict_res_batch._models.resize(model_num); for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { VLOG(2) << "process model output index: " << m_idx; auto output = res.outputs(m_idx); + ModelRes model; + model.set_engine_name(output.engine_name()); for (auto &name : fetch_name) { - predict_res_batch._models[m_idx]._int64_map[name].resize(batch_size); - predict_res_batch._models[m_idx]._float_map[name].resize(batch_size); + model._int64_map[name].resize(batch_size); + model._float_map[name].resize(batch_size); } VLOG(2) << "response batch size " << output.insts_size(); VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size(); @@ -398,29 +404,33 @@ int PredictorClient::batch_predict( int len = output.insts(bi).tensor_array(idx).data_size(); if (_fetch_name_to_type[name] == 0) { int len = output.insts(bi).tensor_array(idx).int64_data_size(); - VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len; - predict_res_batch._models[m_idx]._int64_map[name][bi].resize(len); - VLOG(2) << "fetch name " << name << " index " << idx << " first data " + VLOG(2) << "fetch tensor : " << name + << " type: int64 len : " << len; + model._int64_map[name][bi].resize(len); + VLOG(2) << "fetch name " << name << " index " << idx + << " first data " << output.insts(bi).tensor_array(idx).int64_data(0); for (int i = 0; i < len; ++i) { - predict_res_batch._models[m_idx]._int64_map[name][bi][i] = + model._int64_map[name][bi][i] = output.insts(bi).tensor_array(idx).int64_data(i); } } else if (_fetch_name_to_type[name] == 1) { int len = output.insts(bi).tensor_array(idx).float_data_size(); VLOG(2) << "fetch tensor : " << name << " type: float32 len : " << len; - predict_res_batch._models[m_idx]._float_map[name][bi].resize(len); - VLOG(2) << "fetch name " << name << " index " << idx << " first data " + model._float_map[name][bi].resize(len); + VLOG(2) << "fetch name " << name << " index " << idx + << " first data " << output.insts(bi).tensor_array(idx).float_data(0); for (int i = 0; i < len; ++i) { - predict_res_batch._models[m_idx]._float_map[name][bi][i] = + model._float_map[name][bi][i] = output.insts(bi).tensor_array(idx).float_data(i); } } idx += 1; } } + predict_res_batch.add_model_res(std::move(model)); postprocess_end = timeline.TimeStampUS(); } } @@ -433,7 +443,7 @@ int PredictorClient::batch_predict( << "prepro_1:" << preprocess_end << " " << "client_infer_0:" << client_infer_start << " " << "client_infer_1:" << client_infer_end << " "; - //TODO: multi-models + // TODO: multi-models if (FLAGS_profile_server) { int op_num = res.profile_time_size() / 2; for (int i = 0; i < op_num; ++i) { diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp index ce5a40cf94dea77f0236015634fc7ea25c4d1d81..abb43dad5e9136906923950d56554f7471ed99e8 100644 --- a/core/general-client/src/pybind_general_model.cpp +++ b/core/general-client/src/pybind_general_model.cpp @@ -40,10 +40,9 @@ PYBIND11_MODULE(serving_client, m) { return self.get_float_by_name(model_idx, name); }, py::return_value_policy::reference) - .def("variant_tag", - [](PredictorRes &self) { return self.variant_tag(); }) - .def("model_num", - [](PredictorRes &self) {return self.model_num(); }); + .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); }) + .def("get_engine_names", + [](PredictorRes &self) { return self.get_engine_names(); }); py::class_(m, "PredictorClient", py::buffer_protocol()) .def(py::init()) diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index ca00834cf8de0b01ada4a7996b9617d2370a2cc5..a9ff2e7226b25842889e391d82217b3b6a140170 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -37,8 +37,11 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { VLOG(2) << "Going to run inference"; + const std::vector pre_node_names = pre_names(); if (pre_node_names.size() != 1) { - LOG(ERROR) << "This op(" << op_name() <<") can only have one predecessor op, but received " << pre_node_names.size(); + LOG(ERROR) << "This op(" << op_name() + << ") can only have one predecessor op, but received " + << pre_node_names.size(); return -1; } const std::string pre_name = pre_node_names[0]; @@ -65,8 +68,9 @@ int GeneralInferOp::inference() { int64_t start = timeline.TimeStampUS(); timeline.Start(); - if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { - LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + if (InferManager::instance().infer( + engine_name().c_str(), in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << engine_name().c_str(); return -1; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index ddfeeb656ecad82084b9d118bbaa490249cdf7af..9b7c20d369ab08ba49e80c0f182e976043f2a954 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -40,7 +40,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralResponseOp::inference() { const std::vector pre_node_names = pre_names(); VLOG(2) << "pre node names size: " << pre_node_names.size(); - + const Request *req = dynamic_cast(get_request_message()); // response inst with only fetch_var_names Response *res = mutable_data(); @@ -67,13 +67,14 @@ int GeneralResponseOp::inference() { const GeneralBlob *input_blob; for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { - VLOG(2) << "pre names[" << pi << "]: " - << pre_node_names[pi] << " (" + const std::string &pre_name = pre_node_names[pi]; + VLOG(2) << "pre names[" << pi << "]: " << pre_name << " (" << pre_node_names.size() << ")"; - input_blob = get_depend_argument(pre_node_names[pi]); - fprintf(stderr, "input(%s) blob address %x\n", pre_node_names[pi].c_str(), input_blob); + input_blob = get_depend_argument(pre_name); + // fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(), + // input_blob); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op: " << pre_node_names[pi]; + LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name; return -1; } @@ -82,6 +83,8 @@ int GeneralResponseOp::inference() { VLOG(2) << "input batch size: " << batch_size; ModelOutput *output = res->add_outputs(); + output->set_engine_name( + pre_name); // To get the order of model return values for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = output->add_insts(); for (auto &idx : fetch_index) { @@ -114,7 +117,8 @@ int GeneralResponseOp::inference() { for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; k++) { FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[k]); + fetch_p->mutable_tensor_array(var_idx)->add_int64_data( + data_ptr[k]); } } } else { @@ -130,7 +134,8 @@ int GeneralResponseOp::inference() { } else { for (int j = 0; j < batch_size; ++j) { FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[0]); + fetch_p->mutable_tensor_array(var_idx)->add_int64_data( + data_ptr[0]); } } } @@ -142,7 +147,8 @@ int GeneralResponseOp::inference() { for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; k++) { FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[k]); + fetch_p->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[k]); } } } else { @@ -158,7 +164,8 @@ int GeneralResponseOp::inference() { } else { for (int j = 0; j < batch_size; ++j) { FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[0]); + fetch_p->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[0]); } } } @@ -169,10 +176,10 @@ int GeneralResponseOp::inference() { if (req->profile_server()) { int64_t end = timeline.TimeStampUS(); - for (uint32_t i = 0; i< pre_node_names.size(); ++i) { + for (uint32_t i = 0; i < pre_node_names.size(); ++i) { input_blob = get_depend_argument(pre_node_names[i]); VLOG(2) << "p size for input blob: " << input_blob->p_size; - ModelOutput* output = res->mutable_outputs(i); + ModelOutput *output = res->mutable_outputs(i); for (int i = 0; i < input_blob->p_size; ++i) { output->add_profile_time(input_blob->time_stamp[i]); } diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp index 40243c6df565ed9b3cee226e6c32cc425cae1203..a97eb397d484f457a893b5b86d73dfce91fa95f6 100644 --- a/core/general-server/op/general_text_response_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -37,9 +37,11 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralTextResponseOp::inference() { VLOG(2) << "Going to run inference"; - //TODO: multi-predecessor + const std::vector pre_node_names = pre_names(); if (pre_node_names.size() != 1) { - LOG(ERROR) << "This op(" << op_name() <<") can only have one predecessor op, but received " << pre_node_names.size(); + LOG(ERROR) << "This op(" << op_name() + << ") can only have one predecessor op, but received " + << pre_node_names.size(); return -1; } const std::string pre_name = pre_node_names[0]; @@ -51,6 +53,8 @@ int GeneralTextResponseOp::inference() { return -1; } + // TODO: multi-predecessor + /* const TensorVector *in = &input_blob->tensor_vector; int batch_size = input_blob->GetBatchSize(); @@ -133,7 +137,7 @@ int GeneralTextResponseOp::inference() { res->add_profile_time(start); res->add_profile_time(end); } - + */ return 0; } DEFINE_OP(GeneralTextResponseOp); diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 0a55db2bfd1786f1627402ef6941e5d11ef52742..07e1a056191b03c07de1f49eba595a4348e474ed 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -46,6 +46,7 @@ message Response { message ModelOutput { repeated FetchInst insts = 1; repeated int64 profile_time = 2; + optional string engine_name = 3; } service GeneralModelService { diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 003455964cfb59e16e7bc816e8d30bf96a48c904..e88c9af4f0dfcfe893be54928edc32e637488804 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -46,6 +46,7 @@ message Response { message ModelOutput { repeated FetchInst insts = 1; repeated int64 profile_time = 2; + optional string engine_name = 3; } service GeneralModelService { diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 611883246437fbc48df8da501ff0185d1505250b..6bff36d1186470dd747fab264b836f4ac4ee8458 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -247,8 +247,8 @@ class Client(object): return None multi_result_map_batch = [] - model_num = result_batch.model_num() - for mi in range(model_num): + model_engine_names = result_batch.get_engine_names() + for mi, engine_name in enumerate(model_engine_names): result_map_batch = [] result_map = {} for i, name in enumerate(fetch_names): @@ -263,21 +263,23 @@ class Client(object): result_map_batch.append(single_result) multi_result_map_batch.append(result_map_batch) - if model_num == 1: + ret = None + if len(model_engine_names) == 1: if batch_size == 1: - return [multi_result_map_batch[0][0], self.result_handle_.variant_tag() - ] if need_variant_tag else multi_result_map_batch[0][0] + ret = multi_result_map_batch[0][0] else: - return [multi_result_map_batch[0], self.result_handle_.variant_tag() - ] if need_variant_tag else multi_result_map_batch[0] + ret = multi_result_map_batch[0] else: + ret = {} if batch_size == 1: - multi_result_map = [result_map_batch[0] for result_map_batch in multi_result_map_batch] - return [multi_result_map, self.result_handle_.variant_tag() - ] if need_variant_tag else multi_result_map + for mi, result_map_batch in enumerate(multi_result_map_batch): + ret[model_engine_names[mi]] = result_map_batch[0] else: - return [multi_result_map_batch, self.result_handle_.variant_tag() - ] if need_variant_tag else multi_result_map_batch + for mi, result_map_batch in enumerate(multi_result_map_batch): + ret[model_engine_names[mi]] = result_map_batch + return [ret, self.result_handle_.variant_tag() + ] if need_variant_tag else ret + def release(self): self.client_handle_.destroy_predictor() self.client_handle_ = None diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index c4e4aaf2d182ecbf7d5cf30de1455d744b2b7428..8eb7ca3ddea7e0ad341edf339c6e0b0a94402a73 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -21,6 +21,7 @@ import socket import paddle_serving_server as paddle_serving_server from .version import serving_server_version from contextlib import closing +import collections class OpMaker(object): @@ -36,18 +37,37 @@ class OpMaker(object): "general_dist_kv_quant_infer": "GeneralDistKVQuantInferOp", "general_copy": "GeneralCopyOp" } + self.node_name_suffix_ = collections.defaultdict(int) # currently, inputs and outputs are not used # when we have OpGraphMaker, inputs and outputs are necessary - def create(self, node_type, node_name=None, inputs=[], outputs=[]): + def create(self, node_type, engine_name=None, inputs=[], outputs=[]): if node_type not in self.op_dict: raise Exception("Op type {} is not supported right now".format( node_type)) node = server_sdk.DAGNode() - node.name = node_name if node_name is not None else "{}_op".format( - node_type) + # node.name will be used as the infer engine name + if engine_name: + node.name = engine_name + else: + node.name = '{}_{}'.format(node_type, + self.node_name_suffix_[node_type]) + self.node_name_suffix_[node_type] += 1 + node.type = self.op_dict[node_type] - return node + if inputs: + for dep_node_str in inputs: + dep_node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(dep_node_str, dep_node) + dep = server_sdk.DAGNodeDependency() + dep.name = dep_node.name + dep.mode = "RO" + node.dependencies.extend([dep]) + # Because the return value will be used as the key value of the + # dict, and the proto object is variable which cannot be hashed, + # so it is processed into a string. This has little effect on + # overall efficiency. + return google.protobuf.text_format.MessageToString(node) class OpSeqMaker(object): @@ -56,19 +76,25 @@ class OpSeqMaker(object): self.workflow.name = "workflow1" self.workflow.workflow_type = "Sequence" - def add_op(self, node, dependent_nodes=None): - if dependent_nodes is None: - if len(self.workflow.nodes) >= 1: + def add_op(self, node_str): + node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(node_str, node) + if len(node.dependencies) > 1: + raise Exception( + 'Set more than one predecessor for op in OpSeqMaker is not allowed.' + ) + if len(self.workflow.nodes) >= 1: + if len(node.dependencies) == 0: dep = server_sdk.DAGNodeDependency() dep.name = self.workflow.nodes[-1].name dep.mode = "RO" node.dependencies.extend([dep]) - else: - for dep_node in dependent_nodes: - dep = server_sdk.DAGNodeDependency() - dep.name = dep_node.name - dep.mode = "RO" - node.dependencies.extend([dep]) + elif len(node.dependencies) == 1: + if node.dependencies[0].name != self.workflow.nodes[-1].name: + raise Exception( + 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'. + format(node.dependencies[0].name, self.workflow.nodes[ + -1].name)) self.workflow.nodes.extend([node]) def get_op_sequence(self): @@ -77,6 +103,24 @@ class OpSeqMaker(object): return workflow_conf +class OpGraphMaker(object): + def __init__(self): + self.workflow = server_sdk.Workflow() + self.workflow.name = "workflow1" + # Currently, SDK only supports "Sequence" + self.workflow.workflow_type = "Sequence" + + def add_op(self, node_str): + node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(node_str, node) + self.workflow.nodes.extend([node]) + + def get_op_graph(self): + workflow_conf = server_sdk.WorkflowConf() + workflow_conf.workflows.extend([self.workflow]) + return workflow_conf + + class Server(object): def __init__(self): self.server_handle_ = None @@ -100,7 +144,7 @@ class Server(object): self.cur_path = os.getcwd() self.use_local_bin = False self.mkl_flag = False - self.model_config_paths = None + self.model_config_paths = None # for multi-model in a workflow def set_max_concurrency(self, concurrency): self.max_concurrency = concurrency @@ -117,6 +161,9 @@ class Server(object): def set_op_sequence(self, op_seq): self.workflow_conf = op_seq + def set_op_graph(self, op_graph): + self.workflow_conf = op_graph + def set_memory_optimize(self, flag=False): self.memory_optimization = flag @@ -129,12 +176,6 @@ class Server(object): if self.model_toolkit_conf == None: self.model_toolkit_conf = server_sdk.ModelToolkitConf() - if isinstance(model_config_paths, str): - model_config_paths = {"general_infer_op": model_config_paths} - elif not isinstance(model_config_paths, dict): - raise Exception("model_config_paths can not be {}".format( - type(model_config_paths))) - for engine_name, model_config_path in model_config_paths.items(): engine = server_sdk.EngineDesc() engine.name = engine_name @@ -188,11 +229,33 @@ class Server(object): fout.write(str(pb_obj)) def load_model_config(self, model_config_paths): - self.model_config_paths = model_config_paths - path = model_config_paths.items()[0][1] - self.model_config_path = path + # At present, Serving needs to configure the model path in + # the resource.prototxt file to determine the input and output + # format of the workflow. To ensure that the input and output + # of multiple models are the same + workflow_oi_config_path = None + if isinstance(model_config_paths, str): + # the default engine name is "general_infer" + self.model_config_paths = {"general_infer_0": model_config_paths} + workflow_oi_config_path = self.model_config_paths["general_infer_0"] + elif isinstance(model_config_paths, dict): + self.model_config_paths = {} + for node_str, path in model_config_paths.items(): + node = server_sdk.DAGNode() + google.protobuf.text_format.Parse(node_str, node) + self.model_config_paths[node.name] = path + print("You have specified multiple model paths, please ensure " + "that the input and output of multiple models are the same.") + workflow_oi_config_path = self.model_config_paths.items()[0][1] + else: + raise Exception("The type of model_config_paths must be str or " + "dict({op: model_path}), not {}.".format( + type(model_config_paths))) + self.model_conf = m_config.GeneralModelConfig() - f = open("{}/serving_server_conf.prototxt".format(path), 'r') + f = open( + "{}/serving_server_conf.prototxt".format(workflow_oi_config_path), + 'r') self.model_conf = google.protobuf.text_format.Merge( str(f.read()), self.model_conf) # check config here