CTR prediction serving

d8b05642 · wangguibao · 7a525933 · d8b05642 · d8b05642 · d8b05642
7 changed file
--- a/demo-serving/conf/model_toolkit.prototxt
+++ b/demo-serving/conf/model_toolkit.prototxt
@@ -18,3 +18,14 @@ engines {
  batch_infer_size: 0
  enable_batch_align: 0
 }
+
+engines {
+  name: "ctr_prediction"
+  type: "FLUID_CPU_ANALYSIS_DIR"
+  reloadable_meta: "./data/model/paddle/fluid_time_file"
+  reloadable_type: "timestamp_ne"
+  model_data_path: "./data/model/paddle/fluid/ctr_prediction"
+  runtime_thread_num: 0
+  batch_infer_size: 0
+  enable_batch_align: 0
+}
--- a/demo-serving/conf/service.prototxt
+++ b/demo-serving/conf/service.prototxt
@@ -31,3 +31,8 @@ services {
  name: "EchoKVDBService"
  workflows: "workflow7"
 }
+
+services {
+  name: "CTRPredictionService"
+  workflows: "workflow8"
+}
--- a/demo-serving/conf/workflow.prototxt
+++ b/demo-serving/conf/workflow.prototxt
@@ -75,3 +75,11 @@ workflows {
    type: "KVDBEchoOp"
  } 
 }
+workflows {
+  name: "workflow8"
+  workflow_type: "Sequence"
+  nodes {
+    name: "ctr_prediction_service_op"
+    type: "CTRPredictionOp"
+  } 
+}
--- a/demo-serving/op/ctr_prediction_op.cpp
+++ b/demo-serving/op/ctr_prediction_op.cpp
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "demo-serving/op/ctr_prediction_op.h"
+#include <algorithm>
+#include "predictor/framework/infer.h"
+#include "predictor/framework/memory.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::ctr_prediction::CTRResInstance;
+using baidu::paddle_serving::predictor::ctr_prediction::Response;
+using baidu::paddle_serving::predictor::ctr_prediction::CTRReqInstance;
+using baidu::paddle_serving::predictor::ctr_prediction::Request;
+
+const int CTR_PREDICTION_INPUT_SLOTS =
+    27;  // Total 26 sparse input + 1 dense input
+const int CTR_PREDICTION_SPARSE_SLOTS = 26;  // First 26: sparse input
+const int CTR_PREDICTION_DENSE_SLOT = 26;    // Last 1: dense input
+
+int CTRPredictionOp::inference() {
+  const Request *req = dynamic_cast<const Request *>(get_request_message());
+
+  TensorVector *in = butil::get_object<TensorVector>();
+  uint32_t sample_size = req->instances_size();
+  if (sample_size <= 0) {
+    LOG(WARNING) << "No instances need to inference!";
+    return -1;
+  }
+
+  paddle::PaddleTensor lod_tensors[CTR_PREDICTION_INPUT_SLOTS];
+  for (int i = 0; i < CTR_PREDICTION_INPUT_SLOTS; ++i) {
+    lod_tensors[i].dtype = paddle::PaddleDType::FLOAT32;
+    std::vector<std::vector<size_t>> &lod = lod_tensors[i].lod;
+    lod.resize(1);
+    lod[0].push_back(0);
+  }
+
+  lot_tensors[CTR_PREDICTION_SPARSE_SLOTS].dtype = paddle::PaddleDType::INT64;
+
+  for (int i = 0; i < CTR_PREDICTION_SPARSE_SLOTS; ++i) {
+    paddle::PaddleTensor lod_tensor = lod_tensors[i];
+    std::vector<std::vector<size_t>> &lod = lod_tensor.lod;
+
+    for (uint32_t si = 0; si < sample_size; ++si) {
+      const CTRReqInstance &req_instance = req->instances(si);
+      lod[0].push_back(lod[0].back() + 1);
+    }
+
+    lod_tensor.shape = {lod[0].back(), 1};
+    lod_tensor.data.Resize(lod[0].back() * sizeof(int64_t));
+
+    int offset = 0;
+    for (uint32_t si = 0; si < sample_size; ++si) {
+      int64_t *data_ptr =
+          static_cast<int64_t *>(lod_tensor.data.data()) + offset;
+      const CTRReqInstance &req_instance = req->instances(si);
+      int id_count = 1;
+      memcpy(data_ptr, &req_instance.sparse_ids().data()[i], sizeof(int64_t));
+      offset += 1;
+    }
+
+    in->push_back(lod_tensor);
+  }
+
+  paddle::PaddleTensor lod_tensor = lod_tensors[CTR_PREDICTION_DENSE_SLOT];
+  std::vector<std::vector<size_t>> &lod = lod_tensor.lod;
+
+  for (uint32_t si = 0; si < sample_size; ++si) {
+    const CTRReqInstance &req_instance = req->instances(si);
+    lod[0].push_back(lod[0].back() + req_instance.dense_ids_size());
+  }
+
+  lod_tensor.shape = {lod[0].back(), 1};
+  lod_tensor.data.Resize(lod[0].back() * sizeof(int64_t));
+
+  int offset = 0;
+  for (uint32_t si = 0; si < sample_size; ++si) {
+    int64_t *data_ptr = static_cast<int64_t *>(lod_tensor.data.data()) + offset;
+    const CTRReqInstance &req_instance = req->instances(si);
+    int id_count = req_instance.dense_ids_size();
+    memcpy(data_ptr,
+           req_instance.ids().data(),
+           sizeof(int64_t) * req_instance.dense_ids_size());
+    offset += req_instance.dense_ids_size();
+  }
+
+  in->push_back(lod_tensor);
+
+  TensorVector *out = butil::get_object<TensorVector>();
+  if (!out) {
+    LOG(ERROR) << "Failed get tls output object";
+    return -1;
+  }
+
+  // call paddle fluid model for inferencing
+  if (predictor::InferManager::instance().infer(
+          CTR_PREDICTION_MODEL_NAME, in, out, sample_size)) {
+    LOG(ERROR) << "Failed do infer in fluid model: "
+               << CTR_PREDICTION_MODEL_NAME;
+    return -1;
+  }
+
+  if (out->size() != in->size()) {
+    LOG(ERROR) << "Output tensor size not equal that of input";
+    return -1;
+  }
+
+  Response *res = mutable_data<Response>();
+
+  for (size_t i = 0; i < out->size(); ++i) {
+    int dim1 = out->at(i).shape[0];
+    int dim2 = out->at(i).shape[1];
+
+    if (out->at(i).dtype != paddle::PaddleDType::FLOAT32) {
+      LOG(ERROR) << "Expected data type float";
+      return -1;
+    }
+
+    float *data = static_cast<float *>(out->at(i).data.data());
+    for (int j = 0; j < dim1; ++j) {
+      CTRResInstance *res_instance = res->add_predictions();
+      res_instance->set_prob0(data[j * dim2]);
+      res_instance->set_prob1(data[j * dim2 + 1]);
+    }
+  }
+
+  for (size_t i = 0; i < in->size(); ++i) {
+    (*in)[i].shape.clear();
+  }
+  in->clear();
+  butil::return_object<TensorVector>(in);
+
+  for (size_t i = 0; i < out->size(); ++i) {
+    (*out)[i].shape.clear();
+  }
+  out->clear();
+  butil::return_object<TensorVector>(out);
+  return 0;
+}
+
+DEFINE_OP(CTRPredictionOp);
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-serving/op/ctr_prediction_op.h
+++ b/demo-serving/op/ctr_prediction_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <vector>
+#ifdef BCLOUD
+#ifdef WITH_GPU
+#include "paddle/paddle_inference_api.h"
+#else
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#endif
+#else
+#include "paddle/fluid/inference/paddle_inference_api.h"
+#endif
+#include "demo-serving/ctr_prediction.pb.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+static const char* CTR_PREDICTION_MODEL_NAME = "ctr_prediction";
+
+/**
+ * CTRPredictionOp: Serve CTR prediction requests.
+ *
+ * Original model can be found here:
+ * https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/ctr
+ *
+ * NOTE:
+ *
+ * The main purpose of this OP is to demonstrate usage of large-scale sparse
+ * parameter service (RocksDB for local, mCube for distributed service). To
+ * achieve this, we modified the orginal model slightly:
+ * 1) Function ctr_dnn_model() returns feed_vars and fetch_vars
+ * 2) Use fluid.io.save_inference_model using feed_vars and fetch_vars
+ * returned from ctr_dnn_model(), instead of fluid.io.save_persistables
+ * 3) Further, feed_vars were specified to be inputs of concat layer. Then in
+ * the process of save_inference_model(), the generated inference program will
+ * have the inputs of concat layer as feed targets.
+ * 4) Weight values for the embedding layer will be fetched from sparse param
+ * server for each sample
+ *
+ * Please refer to doc/CTR_PREDICTION.md for details on the original model
+ * and modifications we made
+ *
+ */
+class CTRPredictionOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<
+          baidu::paddle_serving::predictor::ctr_prediction::Response> {
+ public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(CTRPredictionOp);
+
+  int inference();
+};
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-serving/proto/ctr_prediction.proto
+++ b/demo-serving/proto/ctr_prediction.proto
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+import "pds_option.proto";
+import "builtin_format.proto";
+package baidu.paddle_serving.predictor.ctr_prediction;
+
+option cc_generic_services = true;
+
+message CTRReqInstance {
+  repeated int64 sparse_ids = 1;
+  repeated int64 dense_ids = 2;
+};
+
+message Request { repeated CTRReqInstance instances = 1; };
+
+message CTRResInstance {
+  required float prob0 = 1;
+  required float prob1 = 2;
+};
+
+message Response { repeated CTRResInstance predictions = 1; };
+
+service CTRPredictionService {
+  rpc inference(Request) returns (Response);
+  rpc debug(Request) returns (Response);
+  option (pds.options).generate_impl = true;
+};
--- a/doc/CTR_PREDICTION.md
+++ b/doc/CTR_PREDICTION.md
+# CTR预估模型
+
+原始模型地址: