diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h index b5d6d7f23f6474cd1b77889ce6192d890920f85e..6e905a2e1170496c93d80c25e30b47877bf1f3ec 100644 --- a/core/general-server/op/general_infer_helper.h +++ b/core/general-server/op/general_infer_helper.h @@ -37,6 +37,8 @@ struct GeneralBlob { double infer_time; std::vector fetch_name_vector; + int _batch_size; + void Clear() { size_t tensor_count = tensor_vector.size(); for (size_t ti = 0; ti < tensor_count; ++ti) { @@ -44,7 +46,11 @@ struct GeneralBlob { } tensor_vector.clear(); } - + + int SetBatchSize(int batch_size) { _batch_size = batch_size; } + + int GetBatchSize() const { return _batch_size; } + /* int GetBatchSize() const { if (tensor_vector.size() > 0) { if (tensor_vector[0].lod.size() == 1) { @@ -56,7 +62,7 @@ struct GeneralBlob { return -1; } } - + */ std::string ShortDebugString() const { return "Not implemented!"; } }; diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index d3761000fba5ce0663b9c9b1759cbf6a64c912e8..c813a1cbdc746c33fbb39810a8e4ed15f93bf014 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_infer_op.h" #include #include #include #include -#include "core/general-server/op/general_infer_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -36,33 +36,31 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { - const GeneralBlob * input_blob = - get_depend_argument(pre_name()); + const GeneralBlob *input_blob = get_depend_argument(pre_name()); - GeneralBlob * output_blob = mutable_data(); + GeneralBlob *output_blob = mutable_data(); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op:" - << pre_name(); + LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name(); return -1; } const TensorVector *in = &input_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector; int batch_size = input_blob->GetBatchSize(); + output_blob->SetBatchSize(batch_size); VLOG(2) << "infer batch size: " << batch_size; // infer - Timer timeline; - double infer_time = 0.0; - timeline.Start(); + // Timer timeline; + // double infer_time = 0.0; + // timeline.Start(); if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; return -1; } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); - + // timeline.Pause(); + // infer_time = timeline.ElapsedUS(); return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3020dac3bde5ed484b3db4870f35d025ae6a2396..bb6dc8886a032f797ef45fb5c785a10679c13bd3 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_reader_op.h" #include #include #include #include #include "core/general-server/op/general_infer_helper.h" -#include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" @@ -73,7 +73,6 @@ int GeneralReaderOp::inference() { int batch_size = req->insts_size(); int input_var_num = 0; - std::vector elem_type; std::vector elem_size; std::vector capacity; @@ -81,6 +80,8 @@ int GeneralReaderOp::inference() { GeneralBlob *res = mutable_data(); TensorVector *out = &res->tensor_vector; + res->SetBatchSize(batch_size); + if (!res) { LOG(ERROR) << "Failed get op tls reader object output"; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 39624dcd7bb9455f1268c5d2d464b02fdeb16be4..f2fdcedd7d97cf401e426daab0c23c46324319e0 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "core/general-server/op/general_response_op.h" #include #include #include #include #include "core/general-server/op/general_infer_helper.h" -#include "core/general-server/op/general_response_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -37,12 +37,10 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralResponseOp::inference() { - const GeneralBlob *input_blob = - get_depend_argument(pre_name()); + const GeneralBlob *input_blob = get_depend_argument(pre_name()); if (!input_blob) { - LOG(ERROR) << "Failed mutable depended argument, op: " - << pre_name(); + LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name(); return -1; } @@ -56,7 +54,7 @@ int GeneralResponseOp::inference() { VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); - + VLOG(2) << "get resource pointer done."; std::shared_ptr model_config = resource.get_general_model_config(); @@ -67,7 +65,7 @@ int GeneralResponseOp::inference() { fetch_index[i] = model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } - + // response inst with only fetch_var_names Response *res = mutable_data(); @@ -75,7 +73,7 @@ int GeneralResponseOp::inference() { for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); // currently only response float tensor or lod_tensor tensor->set_elem_type(1); @@ -85,8 +83,7 @@ int GeneralResponseOp::inference() { } else { VLOG(2) << "out[" << idx << "] is tensor"; for (int k = 1; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " - << in->at(idx).shape[k]; + VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; tensor->add_shape(in->at(idx).shape[k]); } } @@ -94,7 +91,7 @@ int GeneralResponseOp::inference() { } int var_idx = 0; - for (auto & idx : fetch_index) { + for (auto &idx : fetch_index) { float *data_ptr = static_cast(in->at(idx).data.data()); int cap = 1; for (int j = 1; j < in->at(idx).shape.size(); ++j) { @@ -102,8 +99,8 @@ int GeneralResponseOp::inference() { } if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = in->at(idx).lod[0][j]; - k < in->at(idx).lod[0][j + 1]; k++) { + for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; + k++) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( reinterpret_cast(&(data_ptr[k])), sizeof(float)); }