diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..415c888f6880553b8dced11d60ede4961e41fc96 --- /dev/null +++ b/core/general-server/op/general_infer_helper.h @@ -0,0 +1,65 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include + +namespace baidu { +namespace paddle_serving { +namespace serving { + +static const char* GENERAL_MODEL_NAME = "general_model"; + +struct GeneralBlob { + std::vector tensor_vector; + double infer_time; + std::vector fetch_name_vector; + + void Clear() { + size_t tensor_count = tensor_vector.size(); + for (size_t ti = 0; ti < tensor_count; ++ti) { + tensor_vector[ti].shape.clear(); + } + tensor_vector.clear(); + } + + int GetBatchSize() { + if (tensor_vector.size() > 0) { + if (tensor_vector[0].lod.size() == 1) { + return tensor_vector[0].lod[0].size() - 1; + } else { + return tensor_vector[0].shape[0]; + } + } else { + return -1; + } + } + + std::string ShortDebugString() const { return "Not implemented!"; } +} + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index 6755e1cc4502aca70314166038453c5013e582f9..163d7269747d099fc21f6eac63c4a3e3cd61d8d6 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -16,8 +16,8 @@ #include #include #include +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_op.h" -#include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -37,23 +37,20 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { - const GeneralReaderOutput *reader_out = - get_depend_argument("general_reader_op"); - if (!reader_out) { - LOG(ERROR) << "Failed mutable depended argument, op:" - << "general_reader_op"; - return -1; - } + const GeneralBlob * input_blob = + get_depend_argument(pre_name()); - int reader_status = reader_out->reader_status; - if (reader_status != 0) { - LOG(ERROR) << "Read request wrong."; + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op:" + << pre_name(); return -1; } - const TensorVector *in = &reader_out->tensor_vector; + const TensorVector *in = &input_blob->tensor_vector; TensorVector *out = butil::get_object(); - int batch_size = (*in)[0].shape[0]; + int batch_size = in->GetBatchSize(); + + VLOG(2) << "infer batch size: " << batch_size; // infer Timer timeline; double infer_time = 0.0; @@ -65,73 +62,6 @@ int GeneralInferOp::inference() { timeline.Pause(); infer_time = timeline.ElapsedUS(); - const Request *req = dynamic_cast(get_request_message()); - - VLOG(2) << "start to call load general model_conf op"; - baidu::paddle_serving::predictor::Resource &resource = - baidu::paddle_serving::predictor::Resource::instance(); - - VLOG(2) << "get resource pointer done."; - std::shared_ptr model_config = - resource.get_general_model_config(); - - std::vector fetch_index; - fetch_index.resize(req->fetch_var_names_size()); - for (int i = 0; i < req->fetch_var_names_size(); ++i) { - fetch_index[i] = - model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; - } - - // response inst with only fetch_var_names - Response *res = mutable_data(); - - res->set_mean_infer_us(infer_time); - - for (int i = 0; i < batch_size; ++i) { - FetchInst *fetch_inst = res->add_insts(); - for (auto & idx : fetch_index) { - Tensor *tensor = fetch_inst->add_tensor_array(); - // currently only response float tensor or lod_tensor - tensor->set_elem_type(1); - if (model_config->_is_lod_fetch[idx]) { - VLOG(2) << "out[" << idx << " is lod_tensor"; - tensor->add_shape(-1); - } else { - VLOG(2) << "out[" << idx << "] is tensor"; - for (int k = 1; k < out->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " - << out->at(idx).shape[k]; - tensor->add_shape(out->at(idx).shape[k]); - } - } - } - } - - int var_idx = 0; - for (auto & idx : fetch_index) { - float *data_ptr = static_cast(out->at(idx).data.data()); - int cap = 1; - for (int j = 1; j < out->at(idx).shape.size(); ++j) { - cap *= out->at(idx).shape[j]; - } - if (model_config->_is_lod_fetch[idx]) { - for (int j = 0; j < batch_size; ++j) { - for (int k = out->at(idx).lod[0][j]; - k < out->at(idx).lod[0][j + 1]; k++) { - res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( - reinterpret_cast(&(data_ptr[k])), sizeof(float)); - } - } - } else { - for (int j = 0; j < batch_size; ++j) { - for (int k = j * cap; k < (j + 1) * cap; ++k) { - res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( - reinterpret_cast(&(data_ptr[k])), sizeof(float)); - } - } - } - var_idx++; - } return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_infer_op.h b/core/general-server/op/general_infer_op.h index f6860f0941afb941623bf9b876e128c06f5a0911..70b13cf3cc13d05cb87191aed8a1018f29cdbcdf 100644 --- a/core/general-server/op/general_infer_op.h +++ b/core/general-server/op/general_infer_op.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -29,17 +30,15 @@ namespace baidu { namespace paddle_serving { namespace serving { -static const char* GENERAL_MODEL_NAME = "general_model"; - class GeneralInferOp - : public baidu::paddle_serving::predictor::OpWithChannel< - baidu::paddle_serving::predictor::general_model::Response> { + : public baidu::paddle_serving::predictor::OpWithChannel { public: typedef std::vector TensorVector; DECLARE_OP(GeneralInferOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3a4aa72baac753423bf126358c72b50352a2531d..a8b0e52df61c15bae197f3368b60ecffb89b398c 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -16,6 +16,7 @@ #include #include #include +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" @@ -77,16 +78,12 @@ int GeneralReaderOp::inference() { std::vector elem_size; std::vector capacity; - GeneralReaderOutput *res = mutable_data(); - TensorVector *in = &res->tensor_vector; + GeneralBlob *res = mutable_data(); + TensorVector *out = &res->tensor_vector; if (!res) { LOG(ERROR) << "Failed get op tls reader object output"; } - if (batch_size <= 0) { - res->reader_status = -1; - return 0; - } int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; @@ -102,7 +99,7 @@ int GeneralReaderOp::inference() { VLOG(2) << "print general model config done."; - // check + // TODO(guru4elephant): how to do conditional check? res->reader_status = conf_check(req, model_config); if (res->reader_status != 0) { LOG(INFO) << "model conf of server:"; @@ -142,26 +139,26 @@ int GeneralReaderOp::inference() { VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; } lod_tensor.name = model_config->_feed_name[i]; - in->push_back(lod_tensor); + out->push_back(lod_tensor); } for (int i = 0; i < var_num; ++i) { - if (in->at(i).lod.size() == 1) { + if (out->at(i).lod.size() == 1) { for (int j = 0; j < batch_size; ++j) { const Tensor &tensor = req->insts(j).tensor_array(i); int data_len = tensor.data_size(); VLOG(2) << "tensor size for var[" << i << "]: " << tensor.data_size(); - int cur_len = in->at(i).lod[0].back(); + int cur_len = out->at(i).lod[0].back(); VLOG(2) << "current len: " << cur_len; - in->at(i).lod[0].push_back(cur_len + data_len); + out->at(i).lod[0].push_back(cur_len + data_len); VLOG(2) << "new len: " << cur_len + data_len; } - in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); - in->at(i).shape = {in->at(i).lod[0].back(), 1}; + out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]); + out->at(i).shape = {out->at(i).lod[0].back(), 1}; VLOG(2) << "var[" << i - << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + << "] is lod_tensor and len=" << out->at(i).lod[0].back(); } else { - in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); VLOG(2) << "var[" << i << "] is tensor and capacity=" << batch_size * capacity[i]; } @@ -169,29 +166,29 @@ int GeneralReaderOp::inference() { for (int i = 0; i < var_num; ++i) { if (elem_type[i] == 0) { - int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int64_t *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { dst_ptr[offset + k] = *(const int64_t *)req->insts(j).tensor_array(i).data(k).c_str(); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } } } else { - float *dst_ptr = static_cast(in->at(i).data.data()); + float *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { dst_ptr[offset + k] = *(const float *)req->insts(j).tensor_array(i).data(k).c_str(); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } diff --git a/core/general-server/op/general_reader_op.h b/core/general-server/op/general_reader_op.h index 4c68d70c37e79bf86838551d899f6cc25b2be923..137fad98083ff6e136ea0446bd0c985a556b62de 100644 --- a/core/general-server/op/general_reader_op.h +++ b/core/general-server/op/general_reader_op.h @@ -25,6 +25,7 @@ #endif #include #include "core/predictor/framework/resource.h" +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/general_model_service.pb.h" #include "core/general-server/load_general_model_service.pb.h" @@ -32,28 +33,15 @@ namespace baidu { namespace paddle_serving { namespace serving { -struct GeneralReaderOutput { - std::vector tensor_vector; - int reader_status = 0; - - void Clear() { - size_t tensor_count = tensor_vector.size(); - for (size_t ti = 0; ti < tensor_count; ++ti) { - tensor_vector[ti].shape.clear(); - } - tensor_vector.clear(); - } - std::string ShortDebugString() const { return "Not implemented!"; } -}; - class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel< - GeneralReaderOutput> { + GeneralBlob> { public: typedef std::vector TensorVector; DECLARE_OP(GeneralReaderOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..377a993dd65d0bcd666c6d767407f80c276cd35a --- /dev/null +++ b/core/general-server/op/general_response_op.cpp @@ -0,0 +1,129 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_infer_helper.h" +#include "core/general-server/op/general_response_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralResponseOp::inference() { + const GeneralBlob *input_blob = + get_depend_argument(pre_name()); + + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op: " + << pre_name(); + return -1; + } + + const TensorVector *in = &input_blob->tensor_vector; + int batch_size = in->GetBatchSize(); + double infer_time = in->infer_time; + + VLOG(2) << "input batch size: " << batch_size; + + const Request *req = dynamic_cast(get_request_message()); + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + std::vector fetch_index; + fetch_index.resize(req->fetch_var_names_size()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + fetch_index[i] = + model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; + } + + // response inst with only fetch_var_names + Response *res = mutable_data(); + + res->set_mean_infer_us(infer_time); + + for (int i = 0; i < batch_size; ++i) { + FetchInst *fetch_inst = res->add_insts(); + for (auto & idx : fetch_index) { + Tensor *tensor = fetch_inst->add_tensor_array(); + // currently only response float tensor or lod_tensor + tensor->set_elem_type(1); + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << " is lod_tensor"; + tensor->add_shape(-1); + } else { + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 1; k < in->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k - 1 << "]: " + << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); + } + } + } + } + + int var_idx = 0; + for (auto & idx : fetch_index) { + float *data_ptr = static_cast(in->at(idx).data.data()); + int cap = 1; + for (int j = 1; j < in->at(idx).shape.size(); ++j) { + cap *= in->at(idx).shape[j]; + } + if (model_config->_is_lod_fetch[idx]) { + for (int j = 0; j < batch_size; ++j) { + for (int k = in->at(idx).lod[0][j]; + k < in->at(idx).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } else { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } + var_idx++; + } + return 0; +} + +DEFINE_OP(GeneralResponseOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_infer_op.h b/core/general-server/op/general_response_op.h similarity index 94% rename from core/general-server/op/general_text_infer_op.h rename to core/general-server/op/general_response_op.h index 5c94a205f8fc03cb8b4d23bb556a8755b7cb55ea..95b9f7708df82dc1ce750cb4067b79c8ca60fd05 100644 --- a/core/general-server/op/general_text_infer_op.h +++ b/core/general-server/op/general_response_op.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -29,15 +30,16 @@ namespace baidu { namespace paddle_serving { namespace serving { -class GeneralTextInferOp +class GeneralResponseOp : public baidu::paddle_serving::predictor::OpWithChannel< baidu::paddle_serving::predictor::general_model::Response> { public: typedef std::vector TensorVector; - DECLARE_OP(GeneralTextInferOp); + DECLARE_OP(GeneralResponseOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index 9e3c489f6557d6d5e9931ca317718e5f4e3e3683..7f13c1a1e27206ba68e81720c8a4d13c9fad6339 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -17,6 +17,7 @@ #include #include #include "core/general-server/op/general_text_reader_op.h" +#include "core/general-server/op/general_infer_helper.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" @@ -42,7 +43,7 @@ int GeneralTextReaderOp::inference() { std::vector elem_size; std::vector capacity; - GeneralTextReaderOutput *res = mutable_data(); + GeneralBlob *res = mutable_data(); TensorVector *in = &res->tensor_vector; if (!res) { diff --git a/core/general-server/op/general_text_reader_op.h b/core/general-server/op/general_text_reader_op.h index e7f484a9915609887c2a6593ff1ad0655fc4789c..3f9eb5deb1a48789c6a85674de73c333aa08f1af 100644 --- a/core/general-server/op/general_text_reader_op.h +++ b/core/general-server/op/general_text_reader_op.h @@ -32,23 +32,8 @@ namespace baidu { namespace paddle_serving { namespace serving { -struct GeneralTextReaderOutput { - std::vector tensor_vector; - int reader_status = 0; - - void Clear() { - size_t tensor_count = tensor_vector.size(); - for (size_t ti = 0; ti < tensor_count; ++ti) { - tensor_vector[ti].shape.clear(); - } - tensor_vector.clear(); - } - std::string ShortDebugString() const { return "Not implemented!"; } -}; - class GeneralTextReaderOp : - public baidu::paddle_serving::predictor::OpWithChannel< - GeneralTextReaderOutput> { + public baidu::paddle_serving::predictor::OpWithChannel { public: typedef std::vector TensorVector; diff --git a/core/general-server/op/general_text_infer_op.cpp b/core/general-server/op/general_text_response_op.cpp similarity index 66% rename from core/general-server/op/general_text_infer_op.cpp rename to core/general-server/op/general_text_response_op.cpp index 7bc43545ebbf3c206e6d2495af284f8c284b896c..7e67ce86e8f1616e320167a833d9a98f70ad3d99 100644 --- a/core/general-server/op/general_text_infer_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -16,10 +16,8 @@ #include #include #include -#include "core/general-server/op/general_text_infer_op.h" -#include "core/general-server/op/general_infer_op.h" -#include "core/general-server/op/general_text_reader_op.h" -#include "core/general-server/op/general_reader_op.h" +#include "core/general-server/op/general_text_response_op.h" +#include "core/general-server/op/general_infer_helper.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -29,7 +27,6 @@ namespace baidu { namespace paddle_serving { namespace serving { -using baidu::paddle_serving::serving::GENERAL_MODEL_NAME; using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; @@ -40,42 +37,20 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralTextInferOp::inference() { - const GeneralTextReaderOutput *reader_out = - get_depend_argument(pre_name()); + const GeneralBlob *blob_input = + get_depend_argument(pre_name()); - VLOG(2) << "Going to get previous node name: " << pre_name(); - - if (!reader_out) { - LOG(ERROR) << "Failed mutable depended argument, op:" - << "general_text_reader_op"; + if (!blob_input) { + LOG(ERROR) << "Failed mutable depended argument, op: " + << pre_name(); return -1; } - int reader_status = reader_out->reader_status; - if (reader_status != 0) { - LOG(ERROR) << "Read request wrong."; - return -1; - } + const TensorVector *in = &blob_input->tensor_vector; + int batch_size = in->GetBatchSize(); - const TensorVector *in = &reader_out->tensor_vector; - TensorVector *out = butil::get_object(); - int batch_size = 0; - if (in->at(0).lod.size() == 1) { - batch_size = in->at(0).lod[0].size() - 1; - } else { - batch_size = in->at(0).shape[0]; - } VLOG(2) << "infer batch size: " << batch_size; // infer - Timer timeline; - double infer_time = 0.0; - timeline.Start(); - if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { - LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; - return -1; - } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); const Request *req = dynamic_cast(get_request_message()); @@ -110,10 +85,10 @@ int GeneralTextInferOp::inference() { tensor->add_shape(-1); } else { VLOG(2) << "out[" << idx << "] is tensor"; - for (int k = 1; k < out->at(idx).shape.size(); ++k) { + for (int k = 1; k < in->at(idx).shape.size(); ++k) { VLOG(2) << "shape[" << k - 1 << "]: " - << out->at(idx).shape[k]; - tensor->add_shape(out->at(idx).shape[k]); + << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); } } } @@ -121,15 +96,15 @@ int GeneralTextInferOp::inference() { int var_idx = 0; for (auto & idx : fetch_index) { - float *data_ptr = static_cast(out->at(idx).data.data()); + float *data_ptr = static_cast(in->at(idx).data.data()); int cap = 1; - for (int j = 1; j < out->at(idx).shape.size(); ++j) { - cap *= out->at(idx).shape[j]; + for (int j = 1; j < in->at(idx).shape.size(); ++j) { + cap *= in->at(idx).shape[j]; } if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = out->at(idx).lod[0][j]; - k < out->at(idx).lod[0][j + 1]; k++) { + for (int k = in->at(idx).lod[0][j]; + k < in->at(idx).lod[0][j + 1]; k++) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( data_ptr[k]); } @@ -146,7 +121,7 @@ int GeneralTextInferOp::inference() { } return 0; } -DEFINE_OP(GeneralTextInferOp); +DEFINE_OP(GeneralTextResponseOp); } // namespace serving } // namespace paddle_serving diff --git a/core/general-server/op/general_text_response_op.h b/core/general-server/op/general_text_response_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a062ea3f6553bbf3fffa175277b28e3a5c837143 --- /dev/null +++ b/core/general-server/op/general_text_response_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralTextResponseOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextResponseOp); + + int inference(); + +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu