Merge pull request #13058 from panyx0718/infer

use fast RunPrepareContext for inference

Merge pull request #13058 from panyx0718/infer
use fast RunPrepareContext for inference
823c4f87 · Xin Pan · GitHub · 7cb6fe7a · 5adf118a · 823c4f87
6 changed file
--- a/paddle/fluid/framework/executor.h
+++ b/paddle/fluid/framework/executor.h
@@ -60,6 +60,7 @@ class Executor {
  void Run(const ProgramDesc& prog, Scope* scope, int block_id,
           bool create_local_scope = true, bool create_vars = true);
+  // This API is very slow.
  void Run(const ProgramDesc& program, Scope* scope,
           std::map<std::string, const LoDTensor*>* feed_targets,
           std::map<std::string, LoDTensor*>* fetch_targets,
@@ -79,6 +80,7 @@ class Executor {
                          bool create_local_scope = true,
                          bool create_vars = true, bool keep_kids = false);
+  // This API is very slow.
  void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
                          std::map<std::string, const LoDTensor*>* feed_targets,
                          std::map<std::string, LoDTensor*>* fetch_targets,

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -80,8 +80,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
                               sub_scope_ ? sub_scope_ : scope_.get(), 0);
    // Get the feed_target_names and fetch_target_names
-    feed_target_names_ = inference_program_->GetFeedTargetNames();
+    PrepareFeedFetch();
-    fetch_target_names_ = inference_program_->GetFetchTargetNames();
    return true;
  }

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/inference/api/api_impl.h"
 #include "paddle/fluid/platform/profiler.h"
@@ -57,6 +58,25 @@ std::string num2str(T a) {
 }
 }  // namespace
+void NativePaddlePredictor::PrepareFeedFetch() {
+  for (auto *op : inference_program_->Block(0).AllOps()) {
+    if (op->Type() == "feed") {
+      int idx = boost::get<int>(op->GetAttr("col"));
+      if (feeds_.size() <= idx) {
+        feeds_.resize(idx + 1);
+      }
+      feeds_[idx] = op;
+      feed_names_[op->Output("Out")[0]] = idx;
+    } else if (op->Type() == "fetch") {
+      int idx = boost::get<int>(op->GetAttr("col"));
+      if (fetchs_.size() <= idx) {
+        fetchs_.resize(idx + 1);
+      }
+      fetchs_[idx] = op;
+    }
+  }
+}
 bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
  VLOG(3) << "Predictor::init()";
@@ -108,8 +128,7 @@ bool NativePaddlePredictor::Init(
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
  // Get the feed_target_names and fetch_target_names
-  feed_target_names_ = inference_program_->GetFeedTargetNames();
+  PrepareFeedFetch();
-  fetch_target_names_ = inference_program_->GetFetchTargetNames();
  return true;
 }
@@ -130,36 +149,21 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
  Timer timer;
  timer.tic();
  // set feed variable
-  std::map<std::string, const framework::LoDTensor *> feed_targets;
  std::vector<framework::LoDTensor> feeds;
-  if (!SetFeed(inputs, &feeds)) {
+  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
+  if (!SetFeed(inputs, scope)) {
    LOG(ERROR) << "fail to set feed";
    return false;
  }
-  for (size_t i = 0; i < feed_target_names_.size(); ++i) {
-    if (config_.specify_input_name) {
-      feed_targets[inputs[i].name] = &feeds[i];
-    } else {
-      feed_targets[feed_target_names_[i]] = &feeds[i];
-    }
-  }
-  // get fetch variable
-  std::map<std::string, framework::LoDTensor *> fetch_targets;
-  std::vector<framework::LoDTensor> fetchs;
-  fetchs.resize(fetch_target_names_.size());
-  for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
-    fetch_targets[fetch_target_names_[i]] = &fetchs[i];
-  }
  // Run the inference program
  // if share variables, we need not create variables
  VLOG(4) << "Run prepared context";
-  executor_->RunPreparedContext(
+  executor_->RunPreparedContext(ctx_.get(), scope,
-      ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
+                                false, /* don't create local scope each time*/
-      &feed_targets, &fetch_targets,
+                                false /* don't create variable eatch time */);
-      false, /* don't create local scope each time*/
-      false /* don't create variable eatch time */);
  VLOG(4) << "Finish prepared context";
-  if (!GetFetch(fetchs, output_data)) {
+  // get fetch variable
+  if (!GetFetch(output_data, scope)) {
    LOG(ERROR) << "fail to get fetches";
    return false;
  }
@@ -180,13 +184,13 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
 }
 bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
-                                    std::vector<framework::LoDTensor> *feeds) {
+                                    framework::Scope *scope) {
  VLOG(3) << "Predictor::set_feed";
-  if (inputs.size() != feed_target_names_.size()) {
+  if (inputs.size() != feeds_.size()) {
    LOG(ERROR) << "wrong feed input size.";
    return false;
  }
-  for (size_t i = 0; i < feed_target_names_.size(); ++i) {
+  for (size_t i = 0; i < inputs.size(); ++i) {
    framework::LoDTensor input;
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
    void *input_ptr;
@@ -208,29 +212,38 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
      lod.emplace_back(level);
    }
    input.set_lod(lod);
+    int idx = -1;
-    feeds->push_back(input);
+    if (config_.specify_input_name) {
+      idx = feed_names_[inputs[i].name];
+    } else {
+      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
+    }
+    framework::SetFeedVariable(scope, input, "feed", idx);
  }
  return true;
 }
-bool NativePaddlePredictor::GetFetch(
+bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
-    const std::vector<framework::LoDTensor> &fetchs,
+                                     framework::Scope *scope) {
-    std::vector<PaddleTensor> *outputs) {
  VLOG(3) << "Predictor::get_fetch";
-  outputs->resize(fetchs.size());
+  outputs->resize(fetchs_.size());
-  for (size_t i = 0; i < fetchs.size(); ++i) {
+  for (size_t i = 0; i < fetchs_.size(); ++i) {
+    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
+    PADDLE_ENFORCE(idx == i);
+    framework::LoDTensor &output =
+        framework::GetFetchVariable(*scope, "fetch", idx);
    // TODO(panyx0718): Support fetch of other types.
-    if (fetchs[i].type() != typeid(float)) {
+    if (output.type() != typeid(float)) {
      LOG(ERROR) << "only support fetching float now.";
      return false;
    }
    std::vector<int> shape;
-    auto dims_i = fetchs[i].dims();
+    auto dims_i = output.dims();
-    auto lod = fetchs[i].lod();
+    auto lod = output.lod();
-    const float *output_ptr = fetchs[i].data<float>();
+    const float *output_ptr = output.data<float>();
    // const int64_t* output_ptr = fetchs[i].data<int64_t>();
-    auto num = fetchs[i].numel();
+    auto num = output.numel();
    std::vector<float> data;
    if (0 == lod.size()) {
      std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
@@ -275,7 +288,7 @@ bool NativePaddlePredictor::GetFetch(
    }
    std::memcpy(buffer.data(), data.data(), buffer.length());
    // copy LoD
-    for (const auto &level : fetchs[i].lod()) {
+    for (const auto &level : output.lod()) {
      outputs->at(i).lod.emplace_back(level);
    }
    outputs->at(i).dtype = PaddleDType::FLOAT32;

--- a/paddle/fluid/inference/api/api_impl.h
+++ b/paddle/fluid/inference/api/api_impl.h
@@ -15,6 +15,7 @@
 #pragma once
 #include <glog/logging.h>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
@@ -47,9 +48,11 @@ class NativePaddlePredictor : public PaddlePredictor {
 protected:
  bool SetFeed(const std::vector<PaddleTensor> &input_datas,
-               std::vector<framework::LoDTensor> *feeds);
+               framework::Scope *scope);
-  bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
+  bool GetFetch(std::vector<PaddleTensor> *output_data,
-                std::vector<PaddleTensor> *output_data);
+                framework::Scope *scope);
+  void PrepareFeedFetch();
  NativeConfig config_;
  platform::Place place_;
@@ -57,8 +60,9 @@ class NativePaddlePredictor : public PaddlePredictor {
  std::shared_ptr<framework::Scope> scope_;
  std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
  std::unique_ptr<framework::ProgramDesc> inference_program_;
-  std::vector<std::string> feed_target_names_;
+  std::vector<framework::OpDesc *> feeds_;
-  std::vector<std::string> fetch_target_names_;
+  std::map<std::string, size_t> feed_names_;
+  std::vector<framework::OpDesc *> fetchs_;
  // Do not use unique_ptr, use parent scope to delete
  framework::Scope *sub_scope_{nullptr};
 };

--- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
@@ -74,10 +74,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
    VLOG(5) << "to create variables";
    executor_->CreateVariables(*inference_program_,
                               sub_scope_ ? sub_scope_ : scope_.get(), 0);
    // Get the feed_target_names and fetch_target_names
-    feed_target_names_ = inference_program_->GetFeedTargetNames();
+    PrepareFeedFetch();
-    fetch_target_names_ = inference_program_->GetFetchTargetNames();
    return true;
  }

--- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
@@ -21,6 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/inference/tests/test_helper.h"
 #include "paddle/fluid/platform/cpu_helper.h"
+#include "paddle/fluid/framework/feed_fetch_method.h"
 DEFINE_string(model_path, "", "Directory of the inference model.");
 DEFINE_string(data_file, "", "File of input index data.");
 DEFINE_int32(repeat, 100, "Running the inference program repeat times");
@@ -124,14 +126,35 @@ void ThreadRunInfer(
  std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
  PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
+  // map the data of feed_targets to feed_holder
+  for (auto* op : inference_program->Block(0).AllOps()) {
+    if (op->Type() == "feed") {
+      std::string feed_target_name = op->Output("Out")[0];
+      int idx = boost::get<int>(op->GetAttr("col"));
+      paddle::framework::SetFeedVariable(scope, *feed_targets[feed_target_name],
+                                         "feed", idx);
+    }
+  }
  auto& inputs = jobs[tid];
  auto start_ms = GetCurrentMs();
  for (size_t i = 0; i < inputs.size(); ++i) {
    feed_targets[feed_target_names[0]] = inputs[i];
-    executor.RunPreparedContext(ctx.get(), &sub_scope, &feed_targets,
+    executor.RunPreparedContext(ctx.get(), &sub_scope,
-                                &fetch_targets, false /*create_local_scope*/);
+                                false /*create_local_scope*/);
  }
  auto stop_ms = GetCurrentMs();
+  // obtain the data of fetch_targets from fetch_holder
+  for (auto* op : inference_program->Block(0).AllOps()) {
+    if (op->Type() == "fetch") {
+      std::string fetch_target_name = op->Input("X")[0];
+      int idx = boost::get<int>(op->GetAttr("col"));
+      *fetch_targets[fetch_target_name] =
+          paddle::framework::GetFetchVariable(*scope, "fetch", idx);
+    }
+  }
  scope->DeleteScope(&sub_scope);
  LOG(INFO) << "Tid: " << tid << ", process " << inputs.size()
            << " samples, avg time per sample: "