diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..b5d6d7f23f6474cd1b77889ce6192d890920f85e --- /dev/null +++ b/core/general-server/op/general_infer_helper.h @@ -0,0 +1,65 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include + +namespace baidu { +namespace paddle_serving { +namespace serving { + +static const char* GENERAL_MODEL_NAME = "general_model"; + +struct GeneralBlob { + std::vector tensor_vector; + double infer_time; + std::vector fetch_name_vector; + + void Clear() { + size_t tensor_count = tensor_vector.size(); + for (size_t ti = 0; ti < tensor_count; ++ti) { + tensor_vector[ti].shape.clear(); + } + tensor_vector.clear(); + } + + int GetBatchSize() const { + if (tensor_vector.size() > 0) { + if (tensor_vector[0].lod.size() == 1) { + return tensor_vector[0].lod[0].size() - 1; + } else { + return tensor_vector[0].shape[0]; + } + } else { + return -1; + } + } + + std::string ShortDebugString() const { return "Not implemented!"; } +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index 6755e1cc4502aca70314166038453c5013e582f9..d3761000fba5ce0663b9c9b1759cbf6a64c912e8 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -17,7 +17,6 @@ #include #include #include "core/general-server/op/general_infer_op.h" -#include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -37,23 +36,22 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { - const GeneralReaderOutput *reader_out = - get_depend_argument("general_reader_op"); - if (!reader_out) { + const GeneralBlob * input_blob = + get_depend_argument(pre_name()); + + GeneralBlob * output_blob = mutable_data(); + + if (!input_blob) { LOG(ERROR) << "Failed mutable depended argument, op:" - << "general_reader_op"; + << pre_name(); return -1; } - int reader_status = reader_out->reader_status; - if (reader_status != 0) { - LOG(ERROR) << "Read request wrong."; - return -1; - } + const TensorVector *in = &input_blob->tensor_vector; + TensorVector *out = &output_blob->tensor_vector; + int batch_size = input_blob->GetBatchSize(); - const TensorVector *in = &reader_out->tensor_vector; - TensorVector *out = butil::get_object(); - int batch_size = (*in)[0].shape[0]; + VLOG(2) << "infer batch size: " << batch_size; // infer Timer timeline; double infer_time = 0.0; @@ -65,73 +63,6 @@ int GeneralInferOp::inference() { timeline.Pause(); infer_time = timeline.ElapsedUS(); - const Request *req = dynamic_cast(get_request_message()); - - VLOG(2) << "start to call load general model_conf op"; - baidu::paddle_serving::predictor::Resource &resource = - baidu::paddle_serving::predictor::Resource::instance(); - - VLOG(2) << "get resource pointer done."; - std::shared_ptr model_config = - resource.get_general_model_config(); - - std::vector fetch_index; - fetch_index.resize(req->fetch_var_names_size()); - for (int i = 0; i < req->fetch_var_names_size(); ++i) { - fetch_index[i] = - model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; - } - - // response inst with only fetch_var_names - Response *res = mutable_data(); - - res->set_mean_infer_us(infer_time); - - for (int i = 0; i < batch_size; ++i) { - FetchInst *fetch_inst = res->add_insts(); - for (auto & idx : fetch_index) { - Tensor *tensor = fetch_inst->add_tensor_array(); - // currently only response float tensor or lod_tensor - tensor->set_elem_type(1); - if (model_config->_is_lod_fetch[idx]) { - VLOG(2) << "out[" << idx << " is lod_tensor"; - tensor->add_shape(-1); - } else { - VLOG(2) << "out[" << idx << "] is tensor"; - for (int k = 1; k < out->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " - << out->at(idx).shape[k]; - tensor->add_shape(out->at(idx).shape[k]); - } - } - } - } - - int var_idx = 0; - for (auto & idx : fetch_index) { - float *data_ptr = static_cast(out->at(idx).data.data()); - int cap = 1; - for (int j = 1; j < out->at(idx).shape.size(); ++j) { - cap *= out->at(idx).shape[j]; - } - if (model_config->_is_lod_fetch[idx]) { - for (int j = 0; j < batch_size; ++j) { - for (int k = out->at(idx).lod[0][j]; - k < out->at(idx).lod[0][j + 1]; k++) { - res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( - reinterpret_cast(&(data_ptr[k])), sizeof(float)); - } - } - } else { - for (int j = 0; j < batch_size; ++j) { - for (int k = j * cap; k < (j + 1) * cap; ++k) { - res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( - reinterpret_cast(&(data_ptr[k])), sizeof(float)); - } - } - } - var_idx++; - } return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_infer_op.h b/core/general-server/op/general_infer_op.h index f6860f0941afb941623bf9b876e128c06f5a0911..6c8d9fdc415122baf3eb94aaea5167579835737e 100644 --- a/core/general-server/op/general_infer_op.h +++ b/core/general-server/op/general_infer_op.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -24,22 +25,21 @@ #include "paddle_inference_api.h" // NOLINT #endif #include "core/general-server/general_model_service.pb.h" +#include "core/general-server/op/general_infer_helper.h" namespace baidu { namespace paddle_serving { namespace serving { -static const char* GENERAL_MODEL_NAME = "general_model"; - class GeneralInferOp - : public baidu::paddle_serving::predictor::OpWithChannel< - baidu::paddle_serving::predictor::general_model::Response> { + : public baidu::paddle_serving::predictor::OpWithChannel { public: typedef std::vector TensorVector; DECLARE_OP(GeneralInferOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3a4aa72baac753423bf126358c72b50352a2531d..3020dac3bde5ed484b3db4870f35d025ae6a2396 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -16,6 +16,7 @@ #include #include #include +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" @@ -77,16 +78,12 @@ int GeneralReaderOp::inference() { std::vector elem_size; std::vector capacity; - GeneralReaderOutput *res = mutable_data(); - TensorVector *in = &res->tensor_vector; + GeneralBlob *res = mutable_data(); + TensorVector *out = &res->tensor_vector; if (!res) { LOG(ERROR) << "Failed get op tls reader object output"; } - if (batch_size <= 0) { - res->reader_status = -1; - return 0; - } int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; @@ -102,9 +99,9 @@ int GeneralReaderOp::inference() { VLOG(2) << "print general model config done."; - // check - res->reader_status = conf_check(req, model_config); - if (res->reader_status != 0) { + // TODO(guru4elephant): how to do conditional check? + int ret = conf_check(req, model_config); + if (ret != 0) { LOG(INFO) << "model conf of server:"; resource.print_general_model_config(model_config); return 0; @@ -142,26 +139,26 @@ int GeneralReaderOp::inference() { VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; } lod_tensor.name = model_config->_feed_name[i]; - in->push_back(lod_tensor); + out->push_back(lod_tensor); } for (int i = 0; i < var_num; ++i) { - if (in->at(i).lod.size() == 1) { + if (out->at(i).lod.size() == 1) { for (int j = 0; j < batch_size; ++j) { const Tensor &tensor = req->insts(j).tensor_array(i); int data_len = tensor.data_size(); VLOG(2) << "tensor size for var[" << i << "]: " << tensor.data_size(); - int cur_len = in->at(i).lod[0].back(); + int cur_len = out->at(i).lod[0].back(); VLOG(2) << "current len: " << cur_len; - in->at(i).lod[0].push_back(cur_len + data_len); + out->at(i).lod[0].push_back(cur_len + data_len); VLOG(2) << "new len: " << cur_len + data_len; } - in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); - in->at(i).shape = {in->at(i).lod[0].back(), 1}; + out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]); + out->at(i).shape = {out->at(i).lod[0].back(), 1}; VLOG(2) << "var[" << i - << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + << "] is lod_tensor and len=" << out->at(i).lod[0].back(); } else { - in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); VLOG(2) << "var[" << i << "] is tensor and capacity=" << batch_size * capacity[i]; } @@ -169,29 +166,29 @@ int GeneralReaderOp::inference() { for (int i = 0; i < var_num; ++i) { if (elem_type[i] == 0) { - int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int64_t *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { dst_ptr[offset + k] = *(const int64_t *)req->insts(j).tensor_array(i).data(k).c_str(); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } } } else { - float *dst_ptr = static_cast(in->at(i).data.data()); + float *dst_ptr = static_cast(out->at(i).data.data()); int offset = 0; for (int j = 0; j < batch_size; ++j) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { dst_ptr[offset + k] = *(const float *)req->insts(j).tensor_array(i).data(k).c_str(); } - if (in->at(i).lod.size() == 1) { - offset = in->at(i).lod[0][j + 1]; + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; } else { offset += capacity[i]; } diff --git a/core/general-server/op/general_reader_op.h b/core/general-server/op/general_reader_op.h index 4c68d70c37e79bf86838551d899f6cc25b2be923..137fad98083ff6e136ea0446bd0c985a556b62de 100644 --- a/core/general-server/op/general_reader_op.h +++ b/core/general-server/op/general_reader_op.h @@ -25,6 +25,7 @@ #endif #include #include "core/predictor/framework/resource.h" +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/general_model_service.pb.h" #include "core/general-server/load_general_model_service.pb.h" @@ -32,28 +33,15 @@ namespace baidu { namespace paddle_serving { namespace serving { -struct GeneralReaderOutput { - std::vector tensor_vector; - int reader_status = 0; - - void Clear() { - size_t tensor_count = tensor_vector.size(); - for (size_t ti = 0; ti < tensor_count; ++ti) { - tensor_vector[ti].shape.clear(); - } - tensor_vector.clear(); - } - std::string ShortDebugString() const { return "Not implemented!"; } -}; - class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel< - GeneralReaderOutput> { + GeneralBlob> { public: typedef std::vector TensorVector; DECLARE_OP(GeneralReaderOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39624dcd7bb9455f1268c5d2d464b02fdeb16be4 --- /dev/null +++ b/core/general-server/op/general_response_op.cpp @@ -0,0 +1,128 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_infer_helper.h" +#include "core/general-server/op/general_response_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralResponseOp::inference() { + const GeneralBlob *input_blob = + get_depend_argument(pre_name()); + + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op: " + << pre_name(); + return -1; + } + + const TensorVector *in = &input_blob->tensor_vector; + int batch_size = input_blob->GetBatchSize(); + + VLOG(2) << "input batch size: " << batch_size; + + const Request *req = dynamic_cast(get_request_message()); + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + std::vector fetch_index; + fetch_index.resize(req->fetch_var_names_size()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + fetch_index[i] = + model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; + } + + // response inst with only fetch_var_names + Response *res = mutable_data(); + + // res->set_mean_infer_us(infer_time); + + for (int i = 0; i < batch_size; ++i) { + FetchInst *fetch_inst = res->add_insts(); + for (auto & idx : fetch_index) { + Tensor *tensor = fetch_inst->add_tensor_array(); + // currently only response float tensor or lod_tensor + tensor->set_elem_type(1); + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << " is lod_tensor"; + tensor->add_shape(-1); + } else { + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 1; k < in->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k - 1 << "]: " + << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); + } + } + } + } + + int var_idx = 0; + for (auto & idx : fetch_index) { + float *data_ptr = static_cast(in->at(idx).data.data()); + int cap = 1; + for (int j = 1; j < in->at(idx).shape.size(); ++j) { + cap *= in->at(idx).shape[j]; + } + if (model_config->_is_lod_fetch[idx]) { + for (int j = 0; j < batch_size; ++j) { + for (int k = in->at(idx).lod[0][j]; + k < in->at(idx).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } else { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } + var_idx++; + } + return 0; +} + +DEFINE_OP(GeneralResponseOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_infer_op.h b/core/general-server/op/general_response_op.h similarity index 94% rename from core/general-server/op/general_text_infer_op.h rename to core/general-server/op/general_response_op.h index 5c94a205f8fc03cb8b4d23bb556a8755b7cb55ea..95b9f7708df82dc1ce750cb4067b79c8ca60fd05 100644 --- a/core/general-server/op/general_text_infer_op.h +++ b/core/general-server/op/general_response_op.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -29,15 +30,16 @@ namespace baidu { namespace paddle_serving { namespace serving { -class GeneralTextInferOp +class GeneralResponseOp : public baidu::paddle_serving::predictor::OpWithChannel< baidu::paddle_serving::predictor::general_model::Response> { public: typedef std::vector TensorVector; - DECLARE_OP(GeneralTextInferOp); + DECLARE_OP(GeneralResponseOp); int inference(); + }; } // namespace serving diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index 9e3c489f6557d6d5e9931ca317718e5f4e3e3683..bc6d96a7bb507dcc1c74dac28bfe9a908067ffed 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -42,7 +42,7 @@ int GeneralTextReaderOp::inference() { std::vector elem_size; std::vector capacity; - GeneralTextReaderOutput *res = mutable_data(); + GeneralBlob *res = mutable_data(); TensorVector *in = &res->tensor_vector; if (!res) { @@ -50,8 +50,8 @@ int GeneralTextReaderOp::inference() { } if (batch_size <= 0) { - res->reader_status = -1; - return 0; + LOG(ERROR) << "Batch size < 0"; + return -1; } int var_num = req->insts(0).tensor_array_size(); diff --git a/core/general-server/op/general_text_reader_op.h b/core/general-server/op/general_text_reader_op.h index e7f484a9915609887c2a6593ff1ad0655fc4789c..80573a15cbbacf0d2682ea1b225ef7732d54e9ad 100644 --- a/core/general-server/op/general_text_reader_op.h +++ b/core/general-server/op/general_text_reader_op.h @@ -25,6 +25,7 @@ #endif #include #include "core/predictor/framework/resource.h" +#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/general_model_service.pb.h" #include "core/general-server/load_general_model_service.pb.h" @@ -32,23 +33,8 @@ namespace baidu { namespace paddle_serving { namespace serving { -struct GeneralTextReaderOutput { - std::vector tensor_vector; - int reader_status = 0; - - void Clear() { - size_t tensor_count = tensor_vector.size(); - for (size_t ti = 0; ti < tensor_count; ++ti) { - tensor_vector[ti].shape.clear(); - } - tensor_vector.clear(); - } - std::string ShortDebugString() const { return "Not implemented!"; } -}; - class GeneralTextReaderOp : - public baidu::paddle_serving::predictor::OpWithChannel< - GeneralTextReaderOutput> { + public baidu::paddle_serving::predictor::OpWithChannel { public: typedef std::vector TensorVector; diff --git a/core/general-server/op/general_text_infer_op.cpp b/core/general-server/op/general_text_response_op.cpp similarity index 66% rename from core/general-server/op/general_text_infer_op.cpp rename to core/general-server/op/general_text_response_op.cpp index 79e671047f3bdd026a1a33cfbc056e5775ed6a68..16f476353965384c0d2b0092f2a42efa9c5289e7 100644 --- a/core/general-server/op/general_text_infer_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -16,10 +16,7 @@ #include #include #include -#include "core/general-server/op/general_text_infer_op.h" -#include "core/general-server/op/general_infer_op.h" -#include "core/general-server/op/general_text_reader_op.h" -#include "core/general-server/op/general_reader_op.h" +#include "core/general-server/op/general_text_response_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" #include "core/predictor/framework/resource.h" @@ -29,7 +26,6 @@ namespace baidu { namespace paddle_serving { namespace serving { -using baidu::paddle_serving::serving::GENERAL_MODEL_NAME; using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; @@ -39,40 +35,21 @@ using baidu::paddle_serving::predictor::general_model::FetchInst; using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; -int GeneralTextInferOp::inference() { - const GeneralTextReaderOutput *reader_out = - get_depend_argument("general_text_reader_op"); - if (!reader_out) { - LOG(ERROR) << "Failed mutable depended argument, op:" - << "general_text_reader_op"; - return -1; - } +int GeneralTextResponseOp::inference() { + const GeneralBlob *input_blob = + get_depend_argument(pre_name()); - int reader_status = reader_out->reader_status; - if (reader_status != 0) { - LOG(ERROR) << "Read request wrong."; + if (!input_blob) { + LOG(ERROR) << "Failed mutable depended argument, op: " + << pre_name(); return -1; } - const TensorVector *in = &reader_out->tensor_vector; - TensorVector *out = butil::get_object(); - int batch_size = 0; - if (in->at(0).lod.size() == 1) { - batch_size = in->at(0).lod[0].size() - 1; - } else { - batch_size = in->at(0).shape[0]; - } + const TensorVector *in = &input_blob->tensor_vector; + int batch_size = input_blob->GetBatchSize(); + VLOG(2) << "infer batch size: " << batch_size; // infer - Timer timeline; - double infer_time = 0.0; - timeline.Start(); - if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { - LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; - return -1; - } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); const Request *req = dynamic_cast(get_request_message()); @@ -94,7 +71,7 @@ int GeneralTextInferOp::inference() { // response inst with only fetch_var_names Response *res = mutable_data(); - res->set_mean_infer_us(infer_time); + // res->set_mean_infer_us(infer_time); for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); @@ -107,10 +84,10 @@ int GeneralTextInferOp::inference() { tensor->add_shape(-1); } else { VLOG(2) << "out[" << idx << "] is tensor"; - for (int k = 1; k < out->at(idx).shape.size(); ++k) { + for (int k = 1; k < in->at(idx).shape.size(); ++k) { VLOG(2) << "shape[" << k - 1 << "]: " - << out->at(idx).shape[k]; - tensor->add_shape(out->at(idx).shape[k]); + << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); } } } @@ -118,15 +95,15 @@ int GeneralTextInferOp::inference() { int var_idx = 0; for (auto & idx : fetch_index) { - float *data_ptr = static_cast(out->at(idx).data.data()); + float *data_ptr = static_cast(in->at(idx).data.data()); int cap = 1; - for (int j = 1; j < out->at(idx).shape.size(); ++j) { - cap *= out->at(idx).shape[j]; + for (int j = 1; j < in->at(idx).shape.size(); ++j) { + cap *= in->at(idx).shape[j]; } if (model_config->_is_lod_fetch[idx]) { for (int j = 0; j < batch_size; ++j) { - for (int k = out->at(idx).lod[0][j]; - k < out->at(idx).lod[0][j + 1]; k++) { + for (int k = in->at(idx).lod[0][j]; + k < in->at(idx).lod[0][j + 1]; k++) { res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( data_ptr[k]); } @@ -143,7 +120,7 @@ int GeneralTextInferOp::inference() { } return 0; } -DEFINE_OP(GeneralTextInferOp); +DEFINE_OP(GeneralTextResponseOp); } // namespace serving } // namespace paddle_serving diff --git a/core/general-server/op/general_text_response_op.h b/core/general-server/op/general_text_response_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5efefefb77e03f73d321cb1f2c91c7e183cb5a6b --- /dev/null +++ b/core/general-server/op/general_text_response_op.h @@ -0,0 +1,48 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/op/general_infer_helper.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralTextResponseOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextResponseOp); + + int inference(); + +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/predictor/framework/dag_view.cpp b/core/predictor/framework/dag_view.cpp index fe68ddf98df97ede819b41c0ed6d974c1b4e31f6..42a61b0885b7ecd9c4061c8bde24879d998a1ec1 100644 --- a/core/predictor/framework/dag_view.cpp +++ b/core/predictor/framework/dag_view.cpp @@ -45,6 +45,8 @@ int DagView::init(Dag* dag, const std::string& service_name) { << "at:" << si; return ERR_MEM_ALLOC_FAILURE; } + VLOG(2) << "stage[" << si << "] name: " << stage->full_name; + VLOG(2) << "stage[" << si << "] node size: " << stage->nodes.size(); vstage->full_name = service_name + NAME_DELIMITER + stage->full_name; uint32_t node_size = stage->nodes.size(); // create tls view node @@ -63,16 +65,30 @@ int DagView::init(Dag* dag, const std::string& service_name) { } // initialize a TLS op object + VLOG(2) << "dag view initialized: \n" + << "node id: " << node->id << "\n" + << "node name: " << node->name << "\n" + << "node type: " << node->type; if (op->init(_bus, dag, node->id, node->name, node->type, node->conf) != 0) { LOG(WARNING) << "Failed init op, type:" << node->type; return ERR_INTERNAL_FAILURE; } + op->set_full_name(service_name + NAME_DELIMITER + node->full_name); vnode->conf = node; vnode->op = op; vstage->nodes.push_back(vnode); } + // TODO(guru4elephant): this seems buggy, please review later + if (si > 0) { + VLOG(2) << "set op pre name: \n" + << "current op name: " << vstage->nodes.back()->op->op_name() + << " previous op name: " + << _view[si-1]->nodes.back()->op->op_name(); + vstage->nodes.back()->op->set_pre_node_name( + _view[si-1]->nodes.back()->op->op_name()); + } _view.push_back(vstage); } diff --git a/core/predictor/op/op.cpp b/core/predictor/op/op.cpp index ed3e80fb4439ccdb844aaa72cb8c0f7839d0cff0..d2e512eb69af0b70cbe07b5bc75c3acb88fea918 100644 --- a/core/predictor/op/op.cpp +++ b/core/predictor/op/op.cpp @@ -127,6 +127,7 @@ int Op::process(bool debug) { return -1; } }*/ + if (debug && _timer) { _timer->check("depend"); } diff --git a/core/predictor/op/op.h b/core/predictor/op/op.h index 279744aebd509635aa76e0e26b4f0695c370d22d..84bcf44575826a6ab00e037ce57e119ffbe4f3f3 100644 --- a/core/predictor/op/op.h +++ b/core/predictor/op/op.h @@ -128,10 +128,18 @@ class Op { const char* name() const; + const std::string& op_name() const { return _name; } + const std::string& full_name() const { return _full_name; } + const std::string& pre_name() const { return _pre_node_name; } + void set_full_name(const std::string full_name) { _full_name = full_name; } + void set_pre_node_name(const std::string pre_name) { + _pre_node_name = pre_name; + } + const std::string& type() const; uint32_t id() const; @@ -181,6 +189,7 @@ class Op { Bus* _bus; Dag* _dag; uint32_t _id; + std::string _pre_node_name; // only for sequential execution std::string _name; std::string _full_name; // service_workflow_stageindex_opname std::string _type; diff --git a/go/client_app/imdb_client.go b/go/client_app/imdb_client.go index f6ad12048e81bdad80b877325c17972b2481cf7a..aef823ed2c5209217d4f60f93d19006e67dca35d 100644 --- a/go/client_app/imdb_client.go +++ b/go/client_app/imdb_client.go @@ -21,7 +21,7 @@ import ( "bufio" "strconv" "os" - "serving_client" + serving_client "github.com/PaddlePaddle/Serving/go/serving_client" ) func main() { diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 458a3da5e02a9799eb042674708e994c98bc6701..f8cf66d7b775eaa954a451147b811b7b29b7fe1b 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -23,12 +23,15 @@ from version import serving_server_version class OpMaker(object): def __init__(self): - self.op_dict = {"general_infer":"GeneralInferOp", - "general_text_infer":"GeneralTextInferOp", - "general_reader":"GeneralReaderOp", - "general_text_reader":"GeneralTextReaderOp", - "general_single_kv":"GeneralSingleKVOp", - "general_dist_kv":"GeneralDistKVOp"} + self.op_dict = { + "general_infer":"GeneralInferOp", + "general_reader":"GeneralReaderOp", + "general_response":"GeneralResponseOp", + "general_text_reader":"GeneralTextReaderOp", + "general_text_response":"GeneralTextResponseOp", + "general_single_kv":"GeneralSingleKVOp", + "general_dist_kv":"GeneralDistKVOp" + } # currently, inputs and outputs are not used # when we have OpGraphMaker, inputs and outputs are necessary diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index a5b584ba18dd9cb4fc38b8f603c6e6a835bb1444..65abb6728f9a5d9560f4c7f99c2792ed9dd9e95c 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -24,10 +24,13 @@ from version import serving_server_version class OpMaker(object): def __init__(self): self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv": "GeneralDistKVOp" + "general_infer":"GeneralInferOp", + "general_reader":"GeneralReaderOp", + "general_response":"GeneralResponseOp", + "general_text_reader":"GeneralTextReaderOp", + "general_text_response":"GeneralTextResponseOp", + "general_single_kv":"GeneralSingleKVOp", + "general_dist_kv":"GeneralDistKVOp" } # currently, inputs and outputs are not used