fix general_text_response_op && fix timeline

8dbf2c07 · barrierye · ab645d8c · 8dbf2c07 · 8dbf2c07 · 8dbf2c07
7 changed file
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -21,15 +21,13 @@
 #include <fstream>
 #include <map>
 #include <string>
+#include <utility>  // move
 #include <vector>

 #include "core/sdk-cpp/builtin_format.pb.h"
 #include "core/sdk-cpp/general_model_service.pb.h"
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/predictor_sdk.h"
-#define BLOG(fmt, ...) \
-  printf(              \
-      "[%s:%s]:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)

 using baidu::paddle_serving::sdk_cpp::Predictor;
 using baidu::paddle_serving::sdk_cpp::PredictorApi;

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -248,11 +248,10 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
                output.insts(0).tensor_array(idx).float_data(i);
          }
        }
-        // TODO
-        postprocess_end = timeline.TimeStampUS();
      }
      predict_res.add_model_res(std::move(model));
    }
+    postprocess_end = timeline.TimeStampUS();
  }

  if (FLAGS_profile_client) {
@@ -263,7 +262,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
        << "prepro_1:" << preprocess_end << " "
        << "client_infer_0:" << client_infer_start << " "
        << "client_infer_1:" << client_infer_end << " ";
-    // TODO: multi-model
+    // TODO(barriery): multi-model profile time
    if (FLAGS_profile_server) {
      int op_num = res.profile_time_size() / 2;
      for (int i = 0; i < op_num; ++i) {
@@ -431,8 +430,8 @@ int PredictorClient::batch_predict(
        }
      }
      predict_res_batch.add_model_res(std::move(model));
-      postprocess_end = timeline.TimeStampUS();
    }
+    postprocess_end = timeline.TimeStampUS();
  }

  if (FLAGS_profile_client) {
@@ -443,7 +442,7 @@ int PredictorClient::batch_predict(
        << "prepro_1:" << preprocess_end << " "
        << "client_infer_0:" << client_infer_start << " "
        << "client_infer_1:" << client_infer_end << " ";
-    // TODO: multi-models
+    // TODO(barriery): multi-model profile time
    if (FLAGS_profile_server) {
      int op_num = res.profile_time_size() / 2;
      for (int i = 0; i < op_num; ++i) {

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -176,12 +176,22 @@ int GeneralResponseOp::inference() {

  if (req->profile_server()) {
    int64_t end = timeline.TimeStampUS();
-    for (uint32_t i = 0; i < pre_node_names.size(); ++i) {
-      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[i]);
+    // TODO(barriery): multi-model profile_time.
+    // At present, only the response_op is multi-input, so here we get
+    // the profile_time by hard coding. It needs to be replaced with
+    // a more elegant way.
+    for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
+      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
      VLOG(2) << "p size for input blob: " << input_blob->p_size;
-      ModelOutput *output = res->mutable_outputs(i);
-      for (int i = 0; i < input_blob->p_size; ++i) {
-        output->add_profile_time(input_blob->time_stamp[i]);
+      ModelOutput *output = res->mutable_outputs(pi);
+      int profile_time_idx = -1;
+      if (pi == 0) {
+        profile_time_idx = 0;
+      } else {
+        profile_time_idx = input_blob->p_size - 2;
+      }
+      for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
+        res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
      }
    }
    // TODO(guru4elephant): find more elegant way to do this

--- a/core/general-server/op/general_text_response_op.cpp
+++ b/core/general-server/op/general_text_response_op.cpp
@@ -32,34 +32,18 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::FetchInst;
+using baidu::paddle_serving::predictor::general_model::ModelOutput;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;

 int GeneralTextResponseOp::inference() {
  VLOG(2) << "Going to run inference";
  const std::vector<std::string> pre_node_names = pre_names();
-  if (pre_node_names.size() != 1) {
-    LOG(ERROR) << "This op(" << op_name()
-               << ") can only have one predecessor op, but received "
-               << pre_node_names.size();
-    return -1;
-  }
-  const std::string pre_name = pre_node_names[0];
-
-  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
-
-  if (!input_blob) {
-    LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
-    return -1;
-  }
+  VLOG(2) << "pre node names size: " << pre_node_names.size();

-  // TODO: multi-predecessor
-  /*
-  const TensorVector *in = &input_blob->tensor_vector;
-  int batch_size = input_blob->GetBatchSize();
-
-  VLOG(2) << "infer batch size: " << batch_size;
  const Request *req = dynamic_cast<const Request *>(get_request_message());
+  // response inst with only fetch_var_names
+  Response *res = mutable_data<Response>();

  Timer timeline;
  int64_t start = timeline.TimeStampUS();
@@ -79,65 +63,97 @@ int GeneralTextResponseOp::inference() {
        model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
  }

-  // response inst with only fetch_var_names
-  Response *res = mutable_data<Response>();
+  const GeneralBlob *input_blob;
+  for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
+    const std::string &pre_name = pre_node_names[pi];
+    VLOG(2) << "pre names[" << pi << "]: " << pre_name << " ("
+            << pre_node_names.size() << ")";
+    input_blob = get_depend_argument<GeneralBlob>(pre_name);
+    if (!input_blob) {
+      LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
+      return -1;
+    }

-  for (int i = 0; i < batch_size; ++i) {
-    FetchInst *fetch_inst = res->add_insts();
-    for (auto &idx : fetch_index) {
-      Tensor *tensor = fetch_inst->add_tensor_array();
-      // currently only response float tensor or lod_tensor
-      tensor->set_elem_type(1);
-      if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "out[" << idx << " is lod_tensor";
-        tensor->add_shape(-1);
-      } else {
-        VLOG(2) << "out[" << idx << "] is tensor";
-        for (int k = 1; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
+    const TensorVector *in = &input_blob->tensor_vector;
+    int batch_size = input_blob->GetBatchSize();
+    VLOG(2) << "input batch size: " << batch_size;
+
+    ModelOutput *output = res->add_outputs();
+    output->set_engine_name(
+        pre_name);  // To get the order of model return values
+    for (int i = 0; i < batch_size; ++i) {
+      FetchInst *fetch_inst = output->add_insts();
+      for (auto &idx : fetch_index) {
+        Tensor *tensor = fetch_inst->add_tensor_array();
+        // currently only response float tensor or lod_tensor
+        tensor->set_elem_type(1);
+        if (model_config->_is_lod_fetch[idx]) {
+          VLOG(2) << "out[" << idx << " is lod_tensor";
+          tensor->add_shape(-1);
+        } else {
+          VLOG(2) << "out[" << idx << "] is tensor";
+          for (int k = 1; k < in->at(idx).shape.size(); ++k) {
+            VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
+            tensor->add_shape(in->at(idx).shape[k]);
+          }
        }
      }
    }
-  }

-  int var_idx = 0;
-  for (auto &idx : fetch_index) {
-    float *data_ptr = static_cast<float *>(in->at(idx).data.data());
-    int cap = 1;
-    for (int j = 1; j < in->at(idx).shape.size(); ++j) {
-      cap *= in->at(idx).shape[j];
-    }
-    if (model_config->_is_lod_fetch[idx]) {
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-             k++) {
-          res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data(
-              data_ptr[k]);
-        }
+    int var_idx = 0;
+    for (auto &idx : fetch_index) {
+      float *data_ptr = static_cast<float *>(in->at(idx).data.data());
+      int cap = 1;
+      for (int j = 1; j < in->at(idx).shape.size(); ++j) {
+        cap *= in->at(idx).shape[j];
      }
-    } else {
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = j * cap; k < (j + 1) * cap; ++k) {
-          res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data(
-              data_ptr[k]);
+      if (model_config->_is_lod_fetch[idx]) {
+        for (int j = 0; j < batch_size; ++j) {
+          for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
+               k++) {
+            output->mutable_insts(j)
+                ->mutable_tensor_array(var_idx)
+                ->add_float_data(data_ptr[k]);
+          }
+        }
+      } else {
+        for (int j = 0; j < batch_size; ++j) {
+          for (int k = j * cap; k < (j + 1) * cap; ++k) {
+            output->mutable_insts(j)
+                ->mutable_tensor_array(var_idx)
+                ->add_float_data(data_ptr[k]);
+          }
        }
      }
+      var_idx++;
    }
-    var_idx++;
  }

  if (req->profile_server()) {
    int64_t end = timeline.TimeStampUS();
-
-    for (int i = 0; i < input_blob->p_size; ++i) {
-      res->add_profile_time(input_blob->time_stamp[i]);
+    // TODO(barriery): multi-model profile_time.
+    // At present, only the response_op is multi-input, so here we get
+    // the profile_time by hard coding. It needs to be replaced with
+    // a more elegant way.
+    for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
+      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
+      VLOG(2) << "p size for input blob: " << input_blob->p_size;
+      ModelOutput *output = res->mutable_outputs(pi);
+      int profile_time_idx = -1;
+      if (pi == 0) {
+        profile_time_idx = 0;
+      } else {
+        profile_time_idx = input_blob->p_size - 2;
+      }
+      for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
+        res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
+      }
    }
    // TODO(guru4elephant): find more elegant way to do this
    res->add_profile_time(start);
    res->add_profile_time(end);
  }
-  */
+
  return 0;
 }
 DEFINE_OP(GeneralTextResponseOp);

--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -45,8 +45,7 @@ message Response {

 message ModelOutput {
  repeated FetchInst insts = 1;
-  repeated int64 profile_time = 2;
-  optional string engine_name = 3;
+  optional string engine_name = 2;
 }

 service GeneralModelService {

--- a/core/predictor/framework/dag.cpp
+++ b/core/predictor/framework/dag.cpp
@@ -14,6 +14,7 @@

 #include "core/predictor/framework/dag.h"
 #include <string>
+#include <utility>  // make_pair
 #include <vector>
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/framework/predictor_metric.h"  // PredictorMetric
@@ -210,11 +211,11 @@ int Dag::topo_sort() {
      uint32_t pnid = Dag::node_by_name(it->first)->id -
                      1;  // 0 is reserved for begginer-op
      in_egde[pnid].push_back(nid);
-      LOG(INFO) << "inegde[" << pnid << "]: " << nid;
    }
  }
  for (int i = 0; i < in_degree.size(); ++i) {
-    LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i << "]: " << in_degree[i];
+    LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i
+              << "]: " << in_degree[i];
  }
  int sorted_num = 0;
  DagStage* stage = new (std::nothrow) DagStage();
@@ -228,7 +229,6 @@ int Dag::topo_sort() {
  stage->full_name = full_name() + NAME_DELIMITER + stage->name;
  for (uint32_t nid = 0; nid < nodes_size; ++nid) {
    if (in_degree[nid] == 0) {
-      LOG(INFO) << "nid:" << nid;
      ++sorted_num;
      stage->nodes.push_back(_index_nodes[nid]);
      // assign stage number after stage created
@@ -254,13 +254,10 @@ int Dag::topo_sort() {
    stage->full_name = full_name() + NAME_DELIMITER + stage->name;
    for (uint32_t pi = 0; pi < pre_nodes.size(); ++pi) {
      uint32_t pnid = pre_nodes[pi]->id - 1;
-      LOG(INFO) << "pnid: " <<  pnid;
      for (uint32_t ei = 0; ei < in_egde[pnid].size(); ++ei) {
        uint32_t nid = in_egde[pnid][ei];
        --in_degree[nid];
-        LOG(INFO) << "nid: " << nid << ", indeg: " << in_degree[nid];
        if (in_degree[nid] == 0) {
-          LOG(INFO) << "nid: " << nid;
          ++sorted_num;
          stage->nodes.push_back(_index_nodes[nid]);
          // assign stage number after stage created
@@ -277,26 +274,7 @@ int Dag::topo_sort() {
    }
    _stages.push_back(stage);
  }
-  /*std::stringstream ss;*/
-  // for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
-  // DagStage* stage = new (std::nothrow) DagStage();
-  // if (stage == NULL) {
-  // LOG(ERROR) << "Invalid stage!";
-  // return ERR_MEM_ALLOC_FAILURE;
-  //}
-  // stage->nodes.push_back(_index_nodes[nid]);
-  // ss.str("");
-  // ss << _stages.size();
-  // stage->name = ss.str();
-  // stage->full_name = full_name() + NAME_DELIMITER + stage->name;
-  //_stages.push_back(stage);

-  //// assign stage number after stage created
-  //_index_nodes[nid]->stage = nid;
-  //// assign dag node full name after stage created
-  //_index_nodes[nid]->full_name =
-  // stage->full_name + NAME_DELIMITER + _index_nodes[nid]->name;
-  /*}*/
  return ERR_OK;
 }


--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -45,8 +45,7 @@ message Response {

 message ModelOutput {
  repeated FetchInst insts = 1;
-  repeated int64 profile_time = 2;
-  optional string engine_name = 3;
+  optional string engine_name = 2;
 }

 service GeneralModelService {