diff --git a/core/general-server/op/general_text_infer_op.cpp b/core/general-server/op/general_text_infer_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5acc83881b50da37c3b9526db475d4fe6e0453b --- /dev/null +++ b/core/general-server/op/general_text_infer_op.cpp @@ -0,0 +1,152 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_text_infer_op.h" +#include "core/general-server/op/general_infer_op.h" +#include "core/general-server/op/general_text_reader_op.h" +#include "core/general-server/op/general_reader_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "core/util/include/timer.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::serving::GENERAL_MODEL_NAME; +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int GeneralTextInferOp::inference() { + const GeneralTextReaderOutput *reader_out = + get_depend_argument("general_text_reader_op"); + if (!reader_out) { + LOG(ERROR) << "Failed mutable depended argument, op:" + << "general_text_reader_op"; + return -1; + } + + int reader_status = reader_out->reader_status; + if (reader_status != 0) { + LOG(ERROR) << "Read request wrong."; + return -1; + } + + const TensorVector *in = &reader_out->tensor_vector; + TensorVector *out = butil::get_object(); + int batch_size = (*in)[0].shape[0]; + // infer + Timer timeline; + double infer_time = 0.0; + timeline.Start(); + if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + return -1; + } + timeline.Pause(); + infer_time = timeline.ElapsedUS(); + + const Request *req = dynamic_cast(get_request_message()); + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + std::vector fetch_index; + fetch_index.resize(req->fetch_var_names_size()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + fetch_index[i] = + model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; + } + + // response inst with only fetch_var_names + Response *res = mutable_data(); + + res->set_mean_infer_us(infer_time); + + for (int i = 0; i < batch_size; ++i) { + FetchInst *fetch_inst = res->add_insts(); + for (auto & idx : fetch_index) { + Tensor *tensor = fetch_inst->add_tensor_array(); + // currently only response float tensor or lod_tensor + tensor->set_elem_type(1); + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << " is lod_tensor"; + tensor->add_shape(-1); + } else { + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 1; k < out->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k - 1 << "]: " + << out->at(idx).shape[k]; + tensor->add_shape(out->at(idx).shape[k]); + } + } + } + } + + int var_idx = 0; + for (auto & idx : fetch_index) { + float *data_ptr = static_cast(out->at(idx).data.data()); + int cap = 1; + for (int j = 1; j < out->at(idx).shape.size(); ++j) { + cap *= out->at(idx).shape[j]; + } + if (model_config->_is_lod_fetch[idx]) { + for (int j = 0; j < batch_size; ++j) { + for (int k = out->at(idx).lod[0][j]; + k < out->at(idx).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[k]); + /* + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + */ + } + } + } else { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( + data_ptr[k]); + /* + res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + */ + } + } + } + var_idx++; + } + return 0; +} +DEFINE_OP(GeneralTextInferOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_infer_op.h b/core/general-server/op/general_text_infer_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5c94a205f8fc03cb8b4d23bb556a8755b7cb55ea --- /dev/null +++ b/core/general-server/op/general_text_infer_op.h @@ -0,0 +1,45 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "core/general-server/general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +class GeneralTextInferOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextInferOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e3c489f6557d6d5e9931ca317718e5f4e3e3683 --- /dev/null +++ b/core/general-server/op/general_text_reader_op.cpp @@ -0,0 +1,166 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "core/general-server/op/general_text_reader_op.h" +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FeedInst; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + + +int GeneralTextReaderOp::inference() { + // reade request from client + const Request *req = dynamic_cast(get_request_message()); + + int batch_size = req->insts_size(); + int input_var_num = 0; + + std::vector elem_type; + std::vector elem_size; + std::vector capacity; + + GeneralTextReaderOutput *res = mutable_data(); + TensorVector *in = &res->tensor_vector; + + if (!res) { + LOG(ERROR) << "Failed get op tls reader object output"; + } + + if (batch_size <= 0) { + res->reader_status = -1; + return 0; + } + + int var_num = req->insts(0).tensor_array_size(); + VLOG(2) << "var num: " << var_num; + // read config + + VLOG(2) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + VLOG(2) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + VLOG(2) << "print general model config done."; + + elem_type.resize(var_num); + elem_size.resize(var_num); + capacity.resize(var_num); + for (int i = 0; i < var_num; ++i) { + paddle::PaddleTensor lod_tensor; + elem_type[i] = req->insts(0).tensor_array(i).elem_type(); + VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; + if (elem_type[i] == 0) { // int64 + elem_size[i] = sizeof(int64_t); + lod_tensor.dtype = paddle::PaddleDType::INT64; + } else { + elem_size[i] = sizeof(float); + lod_tensor.dtype = paddle::PaddleDType::FLOAT32; + } + + if (req->insts(0).tensor_array(i).shape(0) == -1) { + lod_tensor.lod.resize(1); + lod_tensor.lod[0].push_back(0); + VLOG(2) << "var[" << i << "] is lod_tensor"; + } else { + lod_tensor.shape.push_back(batch_size); + capacity[i] = 1; + for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { + int dim = req->insts(0).tensor_array(i).shape(k); + VLOG(2) << "shape for var[" << i << "]: " << dim; + capacity[i] *= dim; + lod_tensor.shape.push_back(dim); + } + VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; + } + lod_tensor.name = model_config->_feed_name[i]; + in->push_back(lod_tensor); + } + + for (int i = 0; i < var_num; ++i) { + if (in->at(i).lod.size() == 1) { + for (int j = 0; j < batch_size; ++j) { + const Tensor &tensor = req->insts(j).tensor_array(i); + int data_len = tensor.int_data_size(); + int cur_len = in->at(i).lod[0].back(); + in->at(i).lod[0].push_back(cur_len + data_len); + } + in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); + in->at(i).shape = {in->at(i).lod[0].back(), 1}; + VLOG(2) << "var[" << i + << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + } else { + in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + VLOG(2) << "var[" << i + << "] is tensor and capacity=" << batch_size * capacity[i]; + } + } + + for (int i = 0; i < var_num; ++i) { + if (elem_type[i] == 0) { + int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; + k < req->insts(j).tensor_array(i).int_data_size(); + ++k) { + dst_ptr[offset + k] = + req->insts(j).tensor_array(i).int_data(k); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } else { + float *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; + k < req->insts(j).tensor_array(i).int_data_size(); + ++k) { + dst_ptr[offset + k] = + req->insts(j).tensor_array(i).int_data(k); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } + } + + VLOG(2) << "read data from client success"; + return 0; +} +DEFINE_OP(GeneralTextReaderOp); +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_text_reader_op.h b/core/general-server/op/general_text_reader_op.h new file mode 100644 index 0000000000000000000000000000000000000000..e7f484a9915609887c2a6593ff1ad0655fc4789c --- /dev/null +++ b/core/general-server/op/general_text_reader_op.h @@ -0,0 +1,62 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include +#include "core/predictor/framework/resource.h" +#include "core/general-server/general_model_service.pb.h" +#include "core/general-server/load_general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +struct GeneralTextReaderOutput { + std::vector tensor_vector; + int reader_status = 0; + + void Clear() { + size_t tensor_count = tensor_vector.size(); + for (size_t ti = 0; ti < tensor_count; ++ti) { + tensor_vector[ti].shape.clear(); + } + tensor_vector.clear(); + } + std::string ShortDebugString() const { return "Not implemented!"; } +}; + +class GeneralTextReaderOp : + public baidu::paddle_serving::predictor::OpWithChannel< + GeneralTextReaderOutput> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralTextReaderOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu