From d8b05642fbd3aec55927a69133a0aa47c54f66e5 Mon Sep 17 00:00:00 2001 From: wangguibao Date: Mon, 29 Jul 2019 10:16:55 +0800 Subject: [PATCH] CTR prediction serving --- demo-serving/conf/model_toolkit.prototxt | 11 ++ demo-serving/conf/service.prototxt | 5 + demo-serving/conf/workflow.prototxt | 8 ++ demo-serving/op/ctr_prediction_op.cpp | 160 +++++++++++++++++++++++ demo-serving/op/ctr_prediction_op.h | 71 ++++++++++ demo-serving/proto/ctr_prediction.proto | 40 ++++++ doc/CTR_PREDICTION.md | 3 + 7 files changed, 298 insertions(+) create mode 100644 demo-serving/op/ctr_prediction_op.cpp create mode 100644 demo-serving/op/ctr_prediction_op.h create mode 100644 demo-serving/proto/ctr_prediction.proto create mode 100644 doc/CTR_PREDICTION.md diff --git a/demo-serving/conf/model_toolkit.prototxt b/demo-serving/conf/model_toolkit.prototxt index 269e3474..4fcb909e 100644 --- a/demo-serving/conf/model_toolkit.prototxt +++ b/demo-serving/conf/model_toolkit.prototxt @@ -18,3 +18,14 @@ engines { batch_infer_size: 0 enable_batch_align: 0 } + +engines { + name: "ctr_prediction" + type: "FLUID_CPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "./data/model/paddle/fluid/ctr_prediction" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 +} diff --git a/demo-serving/conf/service.prototxt b/demo-serving/conf/service.prototxt index 3f98211a..59c62765 100644 --- a/demo-serving/conf/service.prototxt +++ b/demo-serving/conf/service.prototxt @@ -31,3 +31,8 @@ services { name: "EchoKVDBService" workflows: "workflow7" } + +services { + name: "CTRPredictionService" + workflows: "workflow8" +} diff --git a/demo-serving/conf/workflow.prototxt b/demo-serving/conf/workflow.prototxt index afd0db67..6b9c5ba6 100644 --- a/demo-serving/conf/workflow.prototxt +++ b/demo-serving/conf/workflow.prototxt @@ -75,3 +75,11 @@ workflows { type: "KVDBEchoOp" } } +workflows { + name: "workflow8" + workflow_type: "Sequence" + nodes { + name: "ctr_prediction_service_op" + type: "CTRPredictionOp" + } +} diff --git a/demo-serving/op/ctr_prediction_op.cpp b/demo-serving/op/ctr_prediction_op.cpp new file mode 100644 index 00000000..04a6cebf --- /dev/null +++ b/demo-serving/op/ctr_prediction_op.cpp @@ -0,0 +1,160 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "demo-serving/op/ctr_prediction_op.h" +#include +#include "predictor/framework/infer.h" +#include "predictor/framework/memory.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::MempoolWrapper; +using baidu::paddle_serving::predictor::ctr_prediction::CTRResInstance; +using baidu::paddle_serving::predictor::ctr_prediction::Response; +using baidu::paddle_serving::predictor::ctr_prediction::CTRReqInstance; +using baidu::paddle_serving::predictor::ctr_prediction::Request; + +const int CTR_PREDICTION_INPUT_SLOTS = + 27; // Total 26 sparse input + 1 dense input +const int CTR_PREDICTION_SPARSE_SLOTS = 26; // First 26: sparse input +const int CTR_PREDICTION_DENSE_SLOT = 26; // Last 1: dense input + +int CTRPredictionOp::inference() { + const Request *req = dynamic_cast(get_request_message()); + + TensorVector *in = butil::get_object(); + uint32_t sample_size = req->instances_size(); + if (sample_size <= 0) { + LOG(WARNING) << "No instances need to inference!"; + return -1; + } + + paddle::PaddleTensor lod_tensors[CTR_PREDICTION_INPUT_SLOTS]; + for (int i = 0; i < CTR_PREDICTION_INPUT_SLOTS; ++i) { + lod_tensors[i].dtype = paddle::PaddleDType::FLOAT32; + std::vector> &lod = lod_tensors[i].lod; + lod.resize(1); + lod[0].push_back(0); + } + + lot_tensors[CTR_PREDICTION_SPARSE_SLOTS].dtype = paddle::PaddleDType::INT64; + + for (int i = 0; i < CTR_PREDICTION_SPARSE_SLOTS; ++i) { + paddle::PaddleTensor lod_tensor = lod_tensors[i]; + std::vector> &lod = lod_tensor.lod; + + for (uint32_t si = 0; si < sample_size; ++si) { + const CTRReqInstance &req_instance = req->instances(si); + lod[0].push_back(lod[0].back() + 1); + } + + lod_tensor.shape = {lod[0].back(), 1}; + lod_tensor.data.Resize(lod[0].back() * sizeof(int64_t)); + + int offset = 0; + for (uint32_t si = 0; si < sample_size; ++si) { + int64_t *data_ptr = + static_cast(lod_tensor.data.data()) + offset; + const CTRReqInstance &req_instance = req->instances(si); + int id_count = 1; + memcpy(data_ptr, &req_instance.sparse_ids().data()[i], sizeof(int64_t)); + offset += 1; + } + + in->push_back(lod_tensor); + } + + paddle::PaddleTensor lod_tensor = lod_tensors[CTR_PREDICTION_DENSE_SLOT]; + std::vector> &lod = lod_tensor.lod; + + for (uint32_t si = 0; si < sample_size; ++si) { + const CTRReqInstance &req_instance = req->instances(si); + lod[0].push_back(lod[0].back() + req_instance.dense_ids_size()); + } + + lod_tensor.shape = {lod[0].back(), 1}; + lod_tensor.data.Resize(lod[0].back() * sizeof(int64_t)); + + int offset = 0; + for (uint32_t si = 0; si < sample_size; ++si) { + int64_t *data_ptr = static_cast(lod_tensor.data.data()) + offset; + const CTRReqInstance &req_instance = req->instances(si); + int id_count = req_instance.dense_ids_size(); + memcpy(data_ptr, + req_instance.ids().data(), + sizeof(int64_t) * req_instance.dense_ids_size()); + offset += req_instance.dense_ids_size(); + } + + in->push_back(lod_tensor); + + TensorVector *out = butil::get_object(); + if (!out) { + LOG(ERROR) << "Failed get tls output object"; + return -1; + } + + // call paddle fluid model for inferencing + if (predictor::InferManager::instance().infer( + CTR_PREDICTION_MODEL_NAME, in, out, sample_size)) { + LOG(ERROR) << "Failed do infer in fluid model: " + << CTR_PREDICTION_MODEL_NAME; + return -1; + } + + if (out->size() != in->size()) { + LOG(ERROR) << "Output tensor size not equal that of input"; + return -1; + } + + Response *res = mutable_data(); + + for (size_t i = 0; i < out->size(); ++i) { + int dim1 = out->at(i).shape[0]; + int dim2 = out->at(i).shape[1]; + + if (out->at(i).dtype != paddle::PaddleDType::FLOAT32) { + LOG(ERROR) << "Expected data type float"; + return -1; + } + + float *data = static_cast(out->at(i).data.data()); + for (int j = 0; j < dim1; ++j) { + CTRResInstance *res_instance = res->add_predictions(); + res_instance->set_prob0(data[j * dim2]); + res_instance->set_prob1(data[j * dim2 + 1]); + } + } + + for (size_t i = 0; i < in->size(); ++i) { + (*in)[i].shape.clear(); + } + in->clear(); + butil::return_object(in); + + for (size_t i = 0; i < out->size(); ++i) { + (*out)[i].shape.clear(); + } + out->clear(); + butil::return_object(out); + return 0; +} + +DEFINE_OP(CTRPredictionOp); + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/demo-serving/op/ctr_prediction_op.h b/demo-serving/op/ctr_prediction_op.h new file mode 100644 index 00000000..a12cccab --- /dev/null +++ b/demo-serving/op/ctr_prediction_op.h @@ -0,0 +1,71 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif +#else +#include "paddle/fluid/inference/paddle_inference_api.h" +#endif +#include "demo-serving/ctr_prediction.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace serving { + +static const char* CTR_PREDICTION_MODEL_NAME = "ctr_prediction"; + +/** + * CTRPredictionOp: Serve CTR prediction requests. + * + * Original model can be found here: + * https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/ctr + * + * NOTE: + * + * The main purpose of this OP is to demonstrate usage of large-scale sparse + * parameter service (RocksDB for local, mCube for distributed service). To + * achieve this, we modified the orginal model slightly: + * 1) Function ctr_dnn_model() returns feed_vars and fetch_vars + * 2) Use fluid.io.save_inference_model using feed_vars and fetch_vars + * returned from ctr_dnn_model(), instead of fluid.io.save_persistables + * 3) Further, feed_vars were specified to be inputs of concat layer. Then in + * the process of save_inference_model(), the generated inference program will + * have the inputs of concat layer as feed targets. + * 4) Weight values for the embedding layer will be fetched from sparse param + * server for each sample + * + * Please refer to doc/CTR_PREDICTION.md for details on the original model + * and modifications we made + * + */ +class CTRPredictionOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::ctr_prediction::Response> { + public: + typedef std::vector TensorVector; + + DECLARE_OP(CTRPredictionOp); + + int inference(); +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/demo-serving/proto/ctr_prediction.proto b/demo-serving/proto/ctr_prediction.proto new file mode 100644 index 00000000..8efed93c --- /dev/null +++ b/demo-serving/proto/ctr_prediction.proto @@ -0,0 +1,40 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; +import "pds_option.proto"; +import "builtin_format.proto"; +package baidu.paddle_serving.predictor.ctr_prediction; + +option cc_generic_services = true; + +message CTRReqInstance { + repeated int64 sparse_ids = 1; + repeated int64 dense_ids = 2; +}; + +message Request { repeated CTRReqInstance instances = 1; }; + +message CTRResInstance { + required float prob0 = 1; + required float prob1 = 2; +}; + +message Response { repeated CTRResInstance predictions = 1; }; + +service CTRPredictionService { + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); + option (pds.options).generate_impl = true; +}; diff --git a/doc/CTR_PREDICTION.md b/doc/CTR_PREDICTION.md new file mode 100644 index 00000000..1b82e191 --- /dev/null +++ b/doc/CTR_PREDICTION.md @@ -0,0 +1,3 @@ +# CTR预估模型 + +原始模型地址: -- GitLab