diff --git a/core/configure/proto/general_model_config.proto b/core/configure/proto/general_model_config.proto index 29753cbc798154bcbb3c3ca3e0cb7935d772c7f3..31e2fa2c89c2981a2178bb55eb5211a49397b533 100644 --- a/core/configure/proto/general_model_config.proto +++ b/core/configure/proto/general_model_config.proto @@ -25,7 +25,8 @@ message FeedVar { message FetchVar { optional string name = 1; optional string alias_name = 2; - repeated int32 shape = 3; + optional bool is_lod_tensor = 3 [ default = false ]; + repeated int32 shape = 4; } message GeneralModelConfig { repeated FeedVar feed_var = 1; diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index b37d2dc7f03095e8af6aaa79f78eb094b021bcba..71ba4f139e0f807aa91322122c77a788ff5212e9 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -108,7 +108,11 @@ std::vector> PredictorClient::predict( VLOG(2) << "fetch general model predictor done."; VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); + VLOG(2) << "fetch name size: " << fetch_name.size(); Request req; + for (auto & name : fetch_name) { + req.add_fetch_var_names(name); + } std::vector tensor_vec; FeedInst *inst = req.add_insts(); for (auto &name : float_feed_name) { diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index cea48dee645abf78ae0d795f9818f4a552bb407a..c12ca25c048d1bd7f80aff31eefe8ed1ac9eeadb 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -30,8 +30,10 @@ namespace serving { using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::FetchInst; using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { const GeneralReaderOutput *reader_out = @@ -57,40 +59,65 @@ int GeneralInferOp::inference() { return -1; } + const Request *req = dynamic_cast(get_request_message()); + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + std::vector fetch_index; + fetch_index.resize(req->fetch_var_names_size()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + fetch_index[i] = + model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; + } + + // response inst with only fetch_var_names Response *res = mutable_data(); for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); - for (int j = 0; j < out->size(); ++j) { + for (auto & idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); + // currently only response float tensor or lod_tensor tensor->set_elem_type(1); - if (out->at(j).lod.size() == 1) { + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << " is lod_tensor"; tensor->add_shape(-1); } else { - for (int k = 1; k < out->at(j).shape.size(); ++k) { - tensor->add_shape(out->at(j).shape[k]); + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 1; k < out->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k - 1 << "]: " + << out->at(idx).shape[k]; + tensor->add_shape(out->at(idx).shape[k]); } } } } - for (int i = 0; i < out->size(); ++i) { - float *data_ptr = static_cast(out->at(i).data.data()); + int var_idx = 0; + for (auto & idx : fetch_index) { + float *data_ptr = static_cast(out->at(idx).data.data()); int cap = 1; - for (int j = 1; j < out->at(i).shape.size(); ++j) { - cap *= out->at(i).shape[j]; + for (int j = 1; j < out->at(idx).shape.size(); ++j) { + cap *= out->at(idx).shape[j]; } - if (out->at(i).lod.size() == 1) { + if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1]; k++) { - res->mutable_insts(j)->mutable_tensor_array(i)->add_data( + for (int k = out->at(idx).lod[0][j]; + k < out->at(idx).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( reinterpret_cast(&(data_ptr[k])), sizeof(float)); } } } else { for (int j = 0; j < batch_size; ++j) { for (int k = j * cap; k < (j + 1) * cap; ++k) { - res->mutable_insts(j)->mutable_tensor_array(i)->add_data( + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( reinterpret_cast(&(data_ptr[k])), sizeof(float)); } } diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 1db8620c566f270fc4697781f9080d1bd0967fce..0ff747f8ee792b4407aabede44586c7cf1b8baf3 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -39,6 +39,9 @@ int conf_check(const Request *req, LOG(ERROR) << "feed var number not match."; return -1; } + + VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size(); + for (int i = 0; i < var_num; ++i) { if (model_config->_feed_type[i] != req->insts(0).tensor_array(i).elem_type()) { @@ -89,15 +92,15 @@ int GeneralReaderOp::inference() { VLOG(2) << "var num: " << var_num; // read config - LOG(INFO) << "start to call load general model_conf op"; + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); - LOG(INFO) << "get resource pointer done."; + VLOG(2) << "get resource pointer done."; std::shared_ptr model_config = resource.get_general_model_config(); - LOG(INFO) << "print general model config done."; + VLOG(2) << "print general model config done."; // check res->reader_status = conf_check(req, model_config); @@ -111,8 +114,8 @@ int GeneralReaderOp::inference() { elem_type.resize(var_num); elem_size.resize(var_num); capacity.resize(var_num); - paddle::PaddleTensor lod_tensor; for (int i = 0; i < var_num; ++i) { + paddle::PaddleTensor lod_tensor; elem_type[i] = req->insts(0).tensor_array(i).elem_type(); VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; if (elem_type[i] == 0) { // int64 @@ -138,11 +141,7 @@ int GeneralReaderOp::inference() { } VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; } - if (i == 0) { - lod_tensor.name = "words"; - } else { - lod_tensor.name = "label"; - } + lod_tensor.name = model_config->_feed_name[i]; in->push_back(lod_tensor); } diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 1b9bfe380134eb494dbc104e0d241ffbf3f98c58..51e38e79ae50319b5ef29f90025a1a8e58a4429b 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -35,6 +35,7 @@ message FetchInst { message Request { repeated FeedInst insts = 1; + repeated string fetch_var_names = 2; }; message Response { diff --git a/core/predictor/framework/resource.cpp b/core/predictor/framework/resource.cpp index e6209c26f789de2a39236076a06336b91c76391f..98571ad7fd0aeac0d82a7b3ff88b9a33ade3c65c 100644 --- a/core/predictor/framework/resource.cpp +++ b/core/predictor/framework/resource.cpp @@ -189,14 +189,18 @@ int Resource::general_model_initialize(const std::string& path, VLOG(2) << "load general model config"; VLOG(2) << "feed var num: " << feed_var_num; _config->_feed_name.resize(feed_var_num); + _config->_feed_alias_name.resize(feed_var_num); _config->_feed_type.resize(feed_var_num); _config->_is_lod_feed.resize(feed_var_num); _config->_capacity.resize(feed_var_num); _config->_feed_shape.resize(feed_var_num); for (int i = 0; i < feed_var_num; ++i) { _config->_feed_name[i] = model_config.feed_var(i).name(); + _config->_feed_alias_name[i] = model_config.feed_var(i).alias_name(); VLOG(2) << "feed var[" << i << "]: " << _config->_feed_name[i]; + VLOG(2) << "feed var[" << i << "]: " + << _config->_feed_alias_name[i]; _config->_feed_type[i] = model_config.feed_var(i).feed_type(); VLOG(2) << "feed type[" << i << "]: " << _config->_feed_type[i]; @@ -219,13 +223,25 @@ int Resource::general_model_initialize(const std::string& path, } int fetch_var_num = model_config.fetch_var_size(); + _config->_is_lod_fetch.resize(fetch_var_num); _config->_fetch_name.resize(fetch_var_num); + _config->_fetch_alias_name.resize(fetch_var_num); _config->_fetch_shape.resize(fetch_var_num); for (int i = 0; i < fetch_var_num; ++i) { _config->_fetch_name[i] = model_config.fetch_var(i).name(); - for (int j = 0; j < model_config.fetch_var(i).shape_size(); ++j) { - int dim = model_config.fetch_var(i).shape(j); - _config->_fetch_shape[i].push_back(dim); + _config->_fetch_alias_name[i] = model_config.fetch_var(i).alias_name(); + _config->_fetch_name_to_index[_config->_fetch_name[i]] = i; + _config->_fetch_alias_name_to_index[_config->_fetch_alias_name[i]] = i; + if (model_config.fetch_var(i).is_lod_tensor()) { + VLOG(2) << "fetch var[" << i << "] is lod tensor"; + _config->_fetch_shape[i] = {-1}; + _config->_is_lod_fetch[i] = true; + } else { + _config->_is_lod_fetch[i] = false; + for (int j = 0; j < model_config.fetch_var(i).shape_size(); ++j) { + int dim = model_config.fetch_var(i).shape(j); + _config->_fetch_shape[i].push_back(dim); + } } } return 0; diff --git a/core/predictor/framework/resource.h b/core/predictor/framework/resource.h index f688681f7e8f7666acc493f0ba7be6dba72dcb3c..05820f9f8b0282260ce3d81d4634ddd20bd360a8 100644 --- a/core/predictor/framework/resource.h +++ b/core/predictor/framework/resource.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "core/cube/cube-api/include/cube_api.h" #include "core/kvdb/include/kvdb/paddle_rocksdb.h" #include "core/predictor/common/inner_common.h" @@ -34,8 +35,10 @@ class PaddleGeneralModelConfig { public: std::vector _feed_name; + std::vector _feed_alias_name; std::vector _feed_type; // 0 int64, 1 float std::vector _is_lod_feed; // true lod tensor + std::vector _is_lod_fetch; // whether a fetch var is lod_tensor std::vector _capacity; // capacity for each tensor /* feed_shape_ for feeded variable @@ -45,7 +48,10 @@ class PaddleGeneralModelConfig { std::vector> _feed_shape; std::vector _fetch_name; + std::vector _fetch_alias_name; std::vector> _fetch_shape; + std::map _fetch_name_to_index; + std::map _fetch_alias_name_to_index; }; class BaseRdDict; diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index d68e8d64aadae03dc397afcd7380f37cfeb46379..0852e2a0fec85df430e489de9189a9c1343717da 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -35,6 +35,7 @@ message FetchInst { message Request { repeated FeedInst insts = 1; + repeated string fetch_var_names = 2; }; message Response { diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 8faab294ac33f18d257a5b13ca8e68326c342418..0f660116be0bef13de43d2bc8d67ba37fd23088b 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -41,7 +41,7 @@ def save_model(server_model_folder, feed_var = model_conf.FeedVar() feed_var.alias_name = key feed_var.name = feed_var_dict[key].name - feed_var.is_lod_tensor = feed_var_dict[key].lod_level == 1 + feed_var.is_lod_tensor = feed_var_dict[key].lod_level >= 1 if feed_var_dict[key].dtype == core.VarDesc.VarType.INT32 or \ feed_var_dict[key].dtype == core.VarDesc.VarType.INT64: feed_var.feed_type = 0 @@ -61,7 +61,15 @@ def save_model(server_model_folder, fetch_var = model_conf.FetchVar() fetch_var.alias_name = key fetch_var.name = fetch_var_dict[key].name - fetch_var.shape.extend(fetch_var_dict[key].shape) + fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1 + if feteh_var.is_lod_tensor: + fetch_var.shape.extend([-1]) + else: + tmp_shape = [] + for v in fetch_var_dict[key].shape: + if v >= 0: + tmp_shape.append(v) + fetch_var.shape.extend(tmp_shape) config.fetch_var.extend([fetch_var]) cmd = "mkdir -p {}".format(client_config_folder)