diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index f8e7634a0a8ae2b90ccb2851491624d6ba0c058e..5ed0361822e372f76b51e0d507eab56e169e8cb6 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -38,26 +38,25 @@ using configure::GeneralModelConfig; void PredictorClient::init_gflags(std::vector argv) { std::call_once(gflags_init_flag, [&]() { - FLAGS_logtostderr = true; - argv.insert(argv.begin(), "dummy"); - int argc = argv.size(); - char **arr = new char *[argv.size()]; - std::string line; - for (size_t i = 0; i < argv.size(); i++) { - arr[i] = &argv[i][0]; - line += argv[i]; - line += ' '; - } - google::ParseCommandLineFlags(&argc, &arr, true); - VLOG(2) << "Init commandline: " << line; - }); + FLAGS_logtostderr = true; + argv.insert(argv.begin(), "dummy"); + int argc = argv.size(); + char **arr = new char *[argv.size()]; + std::string line; + for (size_t i = 0; i < argv.size(); i++) { + arr[i] = &argv[i][0]; + line += argv[i]; + line += ' '; + } + google::ParseCommandLineFlags(&argc, &arr, true); + VLOG(2) << "Init commandline: " << line; + }); } int PredictorClient::init(const std::string &conf_file) { try { GeneralModelConfig model_config; - if (configure::read_proto_conf(conf_file.c_str(), - &model_config) != 0) { + if (configure::read_proto_conf(conf_file.c_str(), &model_config) != 0) { LOG(ERROR) << "Failed to load general model config" << ", file path: " << conf_file; return -1; @@ -75,26 +74,27 @@ int PredictorClient::init(const std::string &conf_file) { VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name() << " index: " << i; std::vector tmp_feed_shape; - VLOG(2) << "feed" << "[" << i << "] shape:"; + VLOG(2) << "feed" + << "[" << i << "] shape:"; for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) { tmp_feed_shape.push_back(model_config.feed_var(i).shape(j)); - VLOG(2) << "shape[" << j << "]: " - << model_config.feed_var(i).shape(j); + VLOG(2) << "shape[" << j << "]: " << model_config.feed_var(i).shape(j); } _type.push_back(model_config.feed_var(i).feed_type()); - VLOG(2) << "feed" << "[" << i << "] feed type: " - << model_config.feed_var(i).feed_type(); + VLOG(2) << "feed" + << "[" << i + << "] feed type: " << model_config.feed_var(i).feed_type(); _shape.push_back(tmp_feed_shape); } for (int i = 0; i < fetch_var_num; ++i) { _fetch_name_to_idx[model_config.fetch_var(i).alias_name()] = i; - VLOG(2) << "fetch [" << i << "]" << " alias name: " - << model_config.fetch_var(i).alias_name(); + VLOG(2) << "fetch [" << i << "]" + << " alias name: " << model_config.fetch_var(i).alias_name(); _fetch_name_to_var_name[model_config.fetch_var(i).alias_name()] = model_config.fetch_var(i).name(); } - } catch (std::exception& e) { + } catch (std::exception &e) { LOG(ERROR) << "Failed load general model config" << e.what(); return -1; } @@ -112,7 +112,7 @@ int PredictorClient::destroy_predictor() { _api.destroy(); } -int PredictorClient::create_predictor_by_desc(const std::string & sdk_desc) { +int PredictorClient::create_predictor_by_desc(const std::string &sdk_desc) { if (_api.create(sdk_desc) != 0) { LOG(ERROR) << "Predictor Creation Failed"; return -1; @@ -156,7 +156,7 @@ std::vector> PredictorClient::predict( VLOG(2) << "fetch name size: " << fetch_name.size(); Request req; - for (auto & name : fetch_name) { + for (auto &name : fetch_name) { req.add_fetch_var_names(name); } std::vector tensor_vec; @@ -247,7 +247,7 @@ std::vector> PredictorClient::predict( << "prepro_1:" << preprocess_end << " " << "client_infer_0:" << client_infer_start << " " << "client_infer_1:" << client_infer_end << " "; - + if (FLAGS_profile_server) { int op_num = res.profile_time_size() / 2; for (int i = 0; i < op_num; ++i) { @@ -255,7 +255,7 @@ std::vector> PredictorClient::predict( oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " "; } } - + oss << "postpro_0:" << postprocess_start << " "; oss << "postpro_1:" << postprocess_end; @@ -288,7 +288,7 @@ std::vector>> PredictorClient::batch_predict( VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); Request req; - for (auto & name : fetch_name) { + for (auto &name : fetch_name) { req.add_fetch_var_names(name); } // @@ -324,7 +324,8 @@ std::vector>> PredictorClient::batch_predict( vec_idx++; } - VLOG(2) << "batch [" << bi << "] " << "float feed value prepared"; + VLOG(2) << "batch [" << bi << "] " + << "float feed value prepared"; vec_idx = 0; for (auto &name : int_feed_name) { @@ -344,7 +345,8 @@ std::vector>> PredictorClient::batch_predict( vec_idx++; } - VLOG(2) << "batch [" << bi << "] " << "itn feed value prepared"; + VLOG(2) << "batch [" << bi << "] " + << "itn feed value prepared"; } Response res; diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h index 0fdda98dfdbb1c274f9106982190ef1ca527eeec..416c9bd6e6bf134b4521d61cfe4e15da7d6de01c 100644 --- a/core/general-server/op/general_infer_helper.h +++ b/core/general-server/op/general_infer_helper.h @@ -38,6 +38,8 @@ struct GeneralBlob { int64_t time_stamp[20]; int p_size = 0; + int _batch_size; + void Clear() { size_t tensor_count = tensor_vector.size(); for (size_t ti = 0; ti < tensor_count; ++ti) { @@ -45,31 +47,21 @@ struct GeneralBlob { } tensor_vector.clear(); } - - int GetBatchSize() const { - if (tensor_vector.size() > 0) { - if (tensor_vector[0].lod.size() == 1) { - return tensor_vector[0].lod[0].size() - 1; - } else { - return tensor_vector[0].shape[0]; - } - } else { - return -1; - } - } + int SetBatchSize(int batch_size) { _batch_size = batch_size; } + + int GetBatchSize() const { return _batch_size; } std::string ShortDebugString() const { return "Not implemented!"; } }; -static void AddBlobInfo(GeneralBlob * blob, - int64_t init_value) { +static void AddBlobInfo(GeneralBlob* blob, int64_t init_value) { blob->time_stamp[blob->p_size] = init_value; blob->p_size++; } -static void CopyBlobInfo(const GeneralBlob * src, - GeneralBlob * tgt) { - memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]), +static void CopyBlobInfo(const GeneralBlob* src, GeneralBlob* tgt) { + memcpy(&(tgt->time_stamp[0]), + &(src->time_stamp[0]), src->p_size * sizeof(int64_t)); } diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index 3ae84b2047a04ec6c76d8de20d15cde7fbb8b68b..81358bf6c12244e8dd9a8f3d27161f9d86d46800 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_infer_op.h" #include #include #include #include -#include "core/general-server/op/general_infer_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -36,25 +36,26 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { - const GeneralBlob * input_blob = - get_depend_argument(pre_name()); + const GeneralBlob *input_blob = get_depend_argument(pre_name()); - GeneralBlob * output_blob = mutable_data(); + GeneralBlob *output_blob = mutable_data(); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op:" - << pre_name(); + LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name(); return -1; } const TensorVector *in = &input_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector; int batch_size = input_blob->GetBatchSize(); + output_blob->SetBatchSize(batch_size); VLOG(2) << "infer batch size: " << batch_size; + Timer timeline; int64_t start = timeline.TimeStampUS(); timeline.Start(); + if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; return -1; diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 8db186eaca989e533a825a57802343b6cbc0fb43..4352411c4e1afa162bffadb68ea374325dfcb4de 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_reader_op.h" #include #include #include #include #include "core/general-server/op/general_infer_helper.h" -#include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/util/include/timer.h" @@ -75,7 +75,6 @@ int GeneralReaderOp::inference() { int batch_size = req->insts_size(); int input_var_num = 0; - std::vector elem_type; std::vector elem_size; std::vector capacity; @@ -83,6 +82,8 @@ int GeneralReaderOp::inference() { GeneralBlob *res = mutable_data(); TensorVector *out = &res->tensor_vector; + res->SetBatchSize(batch_size); + if (!res) { LOG(ERROR) << "Failed get op tls reader object output"; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index caa0185efd7c1794f12fc896777e249308ce5647..2c759f922bca8652c85a1952a4d2e694117e2e82 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_response_op.h" #include #include #include #include #include "core/general-server/op/general_infer_helper.h" -#include "core/general-server/op/general_response_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -37,12 +37,10 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralResponseOp::inference() { - const GeneralBlob *input_blob = - get_depend_argument(pre_name()); + const GeneralBlob *input_blob = get_depend_argument(pre_name()); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op: " - << pre_name(); + LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name(); return -1; } @@ -61,7 +59,7 @@ int GeneralResponseOp::inference() { VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); - + VLOG(2) << "get resource pointer done."; std::shared_ptr model_config = resource.get_general_model_config(); @@ -73,11 +71,12 @@ int GeneralResponseOp::inference() { model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } + // response inst with only fetch_var_names Response *res = mutable_data(); for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); // currently only response float tensor or lod_tensor tensor->set_elem_type(1); @@ -87,8 +86,7 @@ int GeneralResponseOp::inference() { } else { VLOG(2) << "out[" << idx << "] is tensor"; for (int k = 1; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " - << in->at(idx).shape[k]; + VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; tensor->add_shape(in->at(idx).shape[k]); } } @@ -96,7 +94,7 @@ int GeneralResponseOp::inference() { } int var_idx = 0; - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { float *data_ptr = static_cast(in->at(idx).data.data()); int cap = 1; for (int j = 1; j < in->at(idx).shape.size(); ++j) { @@ -104,17 +102,25 @@ int GeneralResponseOp::inference() { } if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = in->at(idx).lod[0][j]; - k < in->at(idx).lod[0][j + 1]; k++) { + for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; + k++) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( reinterpret_cast(&(data_ptr[k])), sizeof(float)); } } } else { - for (int j = 0; j < batch_size; ++j) { - for (int k = j * cap; k < (j + 1) * cap; ++k) { + int var_size = in->at(idx).shape[0]; + if (var_size == batch_size) { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } else { + for (int j = 0; j < batch_size; ++j) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( - reinterpret_cast(&(data_ptr[k])), sizeof(float)); + reinterpret_cast(&(data_ptr[0])), sizeof(float)); } } } diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp index a60aba14aafcad7594fc17527642e88a24d71e2c..43c7af774fd939a8fa1ca14456285cc75dbd7f8d 100644 --- a/core/general-server/op/general_text_response_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_text_response_op.h" #include #include #include #include -#include "core/general-server/op/general_text_response_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -36,12 +36,10 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralTextResponseOp::inference() { - const GeneralBlob *input_blob = - get_depend_argument(pre_name()); + const GeneralBlob *input_blob = get_depend_argument(pre_name()); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op: " - << pre_name(); + LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name(); return -1; } @@ -68,13 +66,13 @@ int GeneralTextResponseOp::inference() { fetch_index[i] = model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } - + // response inst with only fetch_var_names Response *res = mutable_data(); for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); // currently only response float tensor or lod_tensor tensor->set_elem_type(1); @@ -84,8 +82,7 @@ int GeneralTextResponseOp::inference() { } else { VLOG(2) << "out[" << idx << "] is tensor"; for (int k = 1; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " - << in->at(idx).shape[k]; + VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; tensor->add_shape(in->at(idx).shape[k]); } } @@ -93,7 +90,7 @@ int GeneralTextResponseOp::inference() { } int var_idx = 0; - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { float *data_ptr = static_cast(in->at(idx).data.data()); int cap = 1; for (int j = 1; j < in->at(idx).shape.size(); ++j) { @@ -101,8 +98,8 @@ int GeneralTextResponseOp::inference() { } if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = in->at(idx).lod[0][j]; - k < in->at(idx).lod[0][j + 1]; k++) { + for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; + k++) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( data_ptr[k]); } @@ -117,10 +114,10 @@ int GeneralTextResponseOp::inference() { } var_idx++; } - + if (req->profile_server()) { int64_t end = timeline.TimeStampUS(); - + for (int i = 0; i < input_blob->p_size; ++i) { res->add_profile_time(input_blob->time_stamp[i]); }