diff --git a/examples/demo-serving/op/general_infer_op.cpp b/examples/demo-serving/op/general_infer_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..742d27ef4612b8b201f8b21b5058dbf7525c9a9d --- /dev/null +++ b/examples/demo-serving/op/general_infer_op.cpp @@ -0,0 +1,118 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "examples/demo-serving/op/general_infer_op.h" +#include +#include +#include +#include +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" +#include "examples/demo-serving/op/general_reader_op.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Response; +using baidu::paddle_serving::predictor::general_model::FetchInst; +using baidu::paddle_serving::predictor::InferManager; + +int GeneralInferOp::inference() { + const GeneralReaderOutput *reader_out = + get_depend_argument("general_reader_op"); + if (!reader_out) { + LOG(ERROR) << "Failed mutable depended argument, op:" + << "general_reader_op"; + return -1; + } + + int reader_status = reader_out->reader_status; + if (reader_status != 0) { + LOG(ERROR) << "Read request wrong."; + return -1; + } + + const TensorVector *in = &reader_out->tensor_vector; + TensorVector *out = butil::get_object(); + int batch_size = (*in)[0].shape[0]; + // infer + if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; + return -1; + } + + Response *res = mutable_data(); + + for (int i = 0; i < batch_size; ++i) { + FetchInst *fetch_inst = res->add_insts(); + for (int j = 0; j < out->size(); ++j) { + Tensor *tensor = fetch_inst->add_tensor_array(); + tensor->set_elem_type(1); + if (out->at(j).lod.size() == 1) { + tensor->add_shape(-1); + } else { + for (int k = 1; k < out->at(j).shape.size(); ++k) { + tensor->add_shape(out->at(j).shape[k]); + } + } + } + } + + for (int i = 0; i < out->size(); ++i) { + float *data_ptr = static_cast(out->at(i).data.data()); + int cap = 1; + for (int j = 1; j < out->at(i).shape.size(); ++j) { + cap *= out->at(i).shape[j]; + } + if (out->at(i).lod.size() == 1) { + for (int j = 0; j < batch_size; ++j) { + for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1]; k++) { + res->mutable_insts(j)->mutable_tensor_array(i)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } else { + for (int j = 0; j < batch_size; ++j) { + for (int k = j * cap; k < (j + 1) * cap; ++k) { + res->mutable_insts(j)->mutable_tensor_array(i)->add_data( + reinterpret_cast(&(data_ptr[k])), sizeof(float)); + } + } + } + } + /* + for (size_t i = 0; i < in->size(); ++i) { + (*in)[i].shape.clear(); + } + in->clear(); + butil::return_object(in); + + for (size_t i = 0; i < out->size(); ++i) { + (*out)[i].shape.clear(); + } + out->clear(); + butil::return_object(out); + } + */ + return 0; +} +DEFINE_OP(GeneralInferOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/examples/demo-serving/op/general_infer_op.h b/examples/demo-serving/op/general_infer_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ca839054e0f11b40fd5f461307f3121d338028f8 --- /dev/null +++ b/examples/demo-serving/op/general_infer_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include "examples/demo-serving/general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +static const char* GENERAL_MODEL_NAME = "general_model"; + +class GeneralInferOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralInferOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/examples/demo-serving/op/general_reader_op.cpp b/examples/demo-serving/op/general_reader_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..995b976a3a9b7ffbdfc1685ef6bdef8c0c8fce41 --- /dev/null +++ b/examples/demo-serving/op/general_reader_op.cpp @@ -0,0 +1,215 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "examples/demo-serving/op/general_reader_op.h" +#include +#include +#include +#include +#include "core/predictor/framework/infer.h" +#include "core/predictor/framework/memory.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::FeedInst; +using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; + +int conf_check(const Request *req, + const std::shared_ptr &model_config) { + int var_num = req->insts(0).tensor_array_size(); + if (var_num != model_config->_feed_type.size()) { + LOG(ERROR) << "feed var number not match."; + return -1; + } + for (int i = 0; i < var_num; ++i) { + if (model_config->_feed_type[i] != + req->insts(0).tensor_array(i).elem_type()) { + LOG(ERROR) << "feed type not match."; + return -1; + } + if (model_config->_feed_shape[i].size() == + req->insts(0).tensor_array(i).shape_size()) { + for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) { + req->insts(0).tensor_array(i).shape(j); + if (model_config->_feed_shape[i][j] != + req->insts(0).tensor_array(i).shape(j)) { + LOG(ERROR) << "feed shape not match."; + return -1; + } + } + } else { + LOG(ERROR) << "feed shape not match."; + return -1; + } + } + return 0; +} + +int GeneralReaderOp::inference() { + // reade request from client + const Request *req = dynamic_cast(get_request_message()); + + int batch_size = req->insts_size(); + int input_var_num = 0; + + std::vector elem_type; + std::vector elem_size; + std::vector capacity; + + GeneralReaderOutput *res = mutable_data(); + TensorVector *in = &res->tensor_vector; + + if (!res) { + LOG(ERROR) << "Failed get op tls reader object output"; + } + if (batch_size <= 0) { + res->reader_status = -1; + return 0; + } + + int var_num = req->insts(0).tensor_array_size(); + VLOG(3) << "var num: " << var_num; + // read config + + LOG(INFO) << "start to call load general model_conf op"; + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + + LOG(INFO) << "get resource pointer done."; + std::shared_ptr model_config = + resource.get_general_model_config(); + + LOG(INFO) << "print general model config done."; + + // check + res->reader_status = conf_check(req, model_config); + if (res->reader_status != 0) { + LOG(INFO) << "model conf of server:"; + resource.print_general_model_config(model_config); + return 0; + } + // package tensor + + elem_type.resize(var_num); + elem_size.resize(var_num); + capacity.resize(var_num); + paddle::PaddleTensor lod_tensor; + for (int i = 0; i < var_num; ++i) { + elem_type[i] = req->insts(0).tensor_array(i).elem_type(); + VLOG(3) << "var[" << i << "] has elem type: " << elem_type[i]; + if (elem_type[i] == 0) { // int64 + elem_size[i] = sizeof(int64_t); + lod_tensor.dtype = paddle::PaddleDType::INT64; + } else { + elem_size[i] = sizeof(float); + lod_tensor.dtype = paddle::PaddleDType::FLOAT32; + } + + if (req->insts(0).tensor_array(i).shape(0) == -1) { + lod_tensor.lod.resize(1); + lod_tensor.lod[0].push_back(0); + VLOG(3) << "var[" << i << "] is lod_tensor"; + } else { + lod_tensor.shape.push_back(batch_size); + capacity[i] = 1; + for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { + int dim = req->insts(0).tensor_array(i).shape(k); + VLOG(3) << "shape for var[" << i << "]: " << dim; + capacity[i] *= dim; + lod_tensor.shape.push_back(dim); + } + VLOG(3) << "var[" << i << "] is tensor, capacity: " << capacity[i]; + } + if (i == 0) { + lod_tensor.name = "words"; + } else { + lod_tensor.name = "label"; + } + in->push_back(lod_tensor); + } + + for (int i = 0; i < var_num; ++i) { + if (in->at(i).lod.size() == 1) { + for (int j = 0; j < batch_size; ++j) { + const Tensor &tensor = req->insts(j).tensor_array(i); + int data_len = tensor.data_size(); + VLOG(3) << "tensor size for var[" << i << "]: " << tensor.data_size(); + int cur_len = in->at(i).lod[0].back(); + VLOG(3) << "current len: " << cur_len; + in->at(i).lod[0].push_back(cur_len + data_len); + VLOG(3) << "new len: " << cur_len + data_len; + } + in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); + in->at(i).shape = {in->at(i).lod[0].back(), 1}; + VLOG(3) << "var[" << i + << "] is lod_tensor and len=" << in->at(i).lod[0].back(); + } else { + in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); + VLOG(3) << "var[" << i + << "] is tensor and capacity=" << batch_size * capacity[i]; + } + } + + for (int i = 0; i < var_num; ++i) { + if (elem_type[i] == 0) { + int64_t *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { + dst_ptr[offset + k] = + *(const int64_t *)req->insts(j).tensor_array(i).data(k).c_str(); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } else { + float *dst_ptr = static_cast(in->at(i).data.data()); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { + dst_ptr[offset + k] = + *(const float *)req->insts(j).tensor_array(i).data(k).c_str(); + } + if (in->at(i).lod.size() == 1) { + offset = in->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } + } + } + + VLOG(3) << "read data from client success"; + // print request + std::ostringstream oss; + int64_t *example = reinterpret_cast((*in)[0].data.data()); + for (int i = 0; i < 10; i++) { + oss << *(example + i) << " "; + } + VLOG(3) << "head element of first feed var : " << oss.str(); + // + return 0; +} +DEFINE_OP(GeneralReaderOp); +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/examples/demo-serving/op/general_reader_op.h b/examples/demo-serving/op/general_reader_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ce68dcaee53d68d707defeeeacd5dee2981120d0 --- /dev/null +++ b/examples/demo-serving/op/general_reader_op.h @@ -0,0 +1,61 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle_inference_api.h" // NOLINT +#endif +#include +#include "core/predictor/framework/resource.h" +#include "examples/demo-serving/general_model_service.pb.h" +#include "examples/demo-serving/load_general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +struct GeneralReaderOutput { + std::vector tensor_vector; + int reader_status = 0; + + void Clear() { + size_t tensor_count = tensor_vector.size(); + for (size_t ti = 0; ti < tensor_count; ++ti) { + tensor_vector[ti].shape.clear(); + } + tensor_vector.clear(); + } + std::string ShortDebugString() const { return "Not implemented!"; } +}; + +class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel< + GeneralReaderOutput> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(GeneralReaderOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu