update predictor.register_output_hook: support paddle::Tensor (#54254)

f1c9c505 · Yuanle Liu · GitHub · aeee5b1e · f1c9c505 · f1c9c505
10 changed file
--- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
+++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
@@ -137,8 +137,9 @@ void DoInsertCastOp(Graph* graph,
  if (cache->count(var_node) == 0) {
    // insert cast op between var_node and op_node
    std::string cast_input_name = var_node->Var()->Name();
-    std::string cast_output_name =
-        var_node->Var()->Name() + "_cast.tmp_" + std::to_string((*suffix)++);
+    std::string cast_output_name = var_node->Var()->Name() +
+                                   "_cast_auto_mixed.tmp_" +
+                                   std::to_string((*suffix)++);
    framework::OpDesc cast_op_desc(block_desc);
    update_cast_desc(cast_op_desc,
                     cast_input_name,

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2634,7 +2634,8 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
  exe.Run(save_program, scope(), 0, true, true);
 }

-void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
+void AnalysisPredictor::RegisterOutputHook(
+    const OutputTensorHookFunc &hookfunc) {
  static std::once_flag register_hook_flag;
  std::call_once(register_hook_flag, [this] {
    executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
@@ -2655,6 +2656,29 @@ void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
  hookfuncs_.push_back(hookfunc);
 }

+void AnalysisPredictor::RegisterOutputHook(
+    const OutputTensorHookFunc_V2 &hookfunc) {
+  static std::once_flag register_hook_flag;
+  std::call_once(register_hook_flag, [this] {
+    executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
+      for (auto &output : op->Outputs()) {
+        for (auto &var_name : output.second) {
+          auto *var = this->sub_scope_->FindVar(var_name);
+          if (!var || !var->IsType<phi::DenseTensor>()) continue;
+          auto dense_tensor = var->Get<phi::DenseTensor>();
+          if (!dense_tensor.initialized()) continue;
+          auto tensor = paddle::Tensor(
+              std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
+          for (auto &hookfunc : this->hookfuncs_v2_) {
+            hookfunc(op->Type(), var_name, tensor);
+          }
+        }
+      }
+    });
+  });
+  hookfuncs_v2_.push_back(hookfunc);
+}
+
 template <>
 std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
    const AnalysisConfig &config) {
@@ -2936,7 +2960,11 @@ void Predictor::ClearIntermediateTensor() {

 uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }

-void Predictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
+void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
+  predictor_->RegisterOutputHook(hookfunc);
+}
+
+void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) {
  predictor_->RegisterOutputHook(hookfunc);
 }


--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -322,11 +322,21 @@ class AnalysisPredictor : public PaddlePredictor {
  /// \brief Register a output hook function to operate the intermediate tensor
  /// of op output. when using this function, memory reuse should be tured off.
  /// The hook function signature is void(const std::string&, const
-  /// std::string&, const Tensor&>). Here, the first parameter is op's
+  /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
+  /// op's type, the second param is output var name of the op, and the third
+  /// parameter is output tensor with the var name.
+  ///
+  void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;
+
+  ///
+  /// \brief Register a output hook function to operate the intermediate tensor
+  /// of op output. when using this function, memory reuse should be tured off.
+  /// The hook function signature is void(const std::string&, const
+  /// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
  /// type, the second param is output var name of the op, and the third
  /// parameter is output tensor with the var name.
  ///
-  void RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) override;
+  void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override;

  ///
  /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
@@ -597,7 +607,8 @@ class AnalysisPredictor : public PaddlePredictor {
  int root_predictor_id_{-1};

 private:
-  std::vector<Exp_OutputHookFunc> hookfuncs_;
+  std::vector<OutputTensorHookFunc> hookfuncs_;
+  std::vector<OutputTensorHookFunc_V2> hookfuncs_v2_;

  // Some status here that help to determine the status inside the predictor.
  bool status_is_cloned_{false};

--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -38,7 +38,8 @@ namespace paddle {
 using PaddleDType = paddle_infer::DataType;
 using PaddlePlace = paddle_infer::PlaceType;
 using PaddleDataLayout = paddle_infer::DataLayout;
-using paddle_infer::Exp_OutputHookFunc;
+using paddle_infer::OutputTensorHookFunc;
+using paddle_infer::OutputTensorHookFunc_V2;

 /// \brief Memory manager for PaddleTensor.
 ///
@@ -317,11 +318,21 @@ class PD_INFER_DECL PaddlePredictor {
  /// \brief Register a output hook function to operate the intermediate tensor
  /// of op output. when using this function, memory reuse should be tured off.
  /// The hook function signature is void(const std::string&, const
-  /// std::string&, const Tensor&>). Here, the first parameter is op's
+  /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
+  /// op's type, the second param is output var name of the op, and the third
+  /// parameter is output tensor with the var name.
+  ///
+  virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}
+
+  ///
+  /// \brief Register a output hook function to operate the intermediate tensor
+  /// of op output. when using this function, memory reuse should be tured off.
+  /// The hook function signature is void(const std::string&, const
+  /// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
  /// type, the second param is output var name of the op, and the third
  /// parameter is output tensor with the var name.
  ///
-  virtual void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc) {}
+  virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {}

  /// \brief Clone an existing predictor
  /// When using clone, the same network will be created,

--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -197,7 +197,17 @@ class PD_INFER_DECL Predictor {
  /// type, the second param is output var name of the op, and the third
  /// parameter is output tensor with the var name.
  ///
-  void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc);
+  void RegisterOutputHook(const OutputTensorHookFunc& hookfunc);
+
+  ///
+  /// \brief Register a output hook function to operate the intermediate tensor
+  /// of op output. when using this function, memory reuse should be tured off.
+  /// The hook function signature is void(const std::string&, const
+  /// std::string&, const Tensor&>). Here, the first parameter is op's
+  /// type, the second param is output var name of the op, and the third
+  /// parameter is output tensor with the var name.
+  ///
+  void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc);

  ///
  /// \brief Get the execution stream on devices with a concept of stream,

--- a/paddle/fluid/inference/api/paddle_tensor.h
+++ b/paddle/fluid/inference/api/paddle_tensor.h
@@ -37,9 +37,12 @@ namespace paddle_infer {
 using Strings = std::vector<std::string>;

 class Tensor;
-using Exp_OutputHookFunc =
+using OutputTensorHookFunc =
    std::function<void(const std::string&, const std::string&, const Tensor&)>;

+using OutputTensorHookFunc_V2 = std::function<void(
+    const std::string&, const std::string&, const paddle::Tensor&)>;
+
 typedef void (*CallbackFunc)(void*);

 #if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)

--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -291,8 +291,9 @@ class OpConverter {
      if (parameters.count(input)) continue;
      // NOTE(liuyuanle): It is a trick. If you need a name [input], then you
      // need to use [input.substr(0, idx)].
-      // Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
-      auto idx = input.find("_cast.tmp_");
+      // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
+      // auto_mixed_precision_pass.
+      auto idx = input.find("_cast_auto_mixed.tmp_");
      input = input.substr(0, idx);

      auto* var = block_desc->FindVar(input);

--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -315,9 +315,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
      for (auto name : runtime_input_names_) {
        // NOTE(liuyuanle): It is a trick. If you need a [name], then you need
        // to use [name.substr(0, idx)].
-        // Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
+        // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
+        // auto_mixed_precision_pass.
        std::string name_real = name;
-        auto idx = name.find("_cast.tmp_");
+        auto idx = name.find("_cast_auto_mixed.tmp_");
        name = name.substr(0, idx);

        auto &t = inference::analysis::GetFromScope<phi::DenseTensor>(
@@ -387,9 +388,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
        for (auto x : runtime_input_names_) {
          // NOTE(liuyuanle): It is a trick. If you need a [x], then you need
          // to use [x.substr(0, idx)].
-          // Maybe we insert suffix of "_cast.tmp_" in
+          // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
          // auto_mixed_precision_pass.
-          auto idx = x.find("_cast.tmp_");
+          auto idx = x.find("_cast_auto_mixed.tmp_");
          x = x.substr(0, idx);

          PADDLE_ENFORCE_EQ(
@@ -560,9 +561,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
    for (auto x : runtime_input_names_) {
      // NOTE(liuyuanle): It is a trick. If you need a [x], then you need
      // to use [x.substr(0, idx)].
-      // Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
+      // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
+      // auto_mixed_precision_pass.
      std::string x_real = x;
-      auto idx = x.find("_cast.tmp_");
+      auto idx = x.find("_cast_auto_mixed.tmp_");
      x = x.substr(0, idx);

 #if IS_TRT_VERSION_LT(8000)

--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -36,6 +36,7 @@
 #include "paddle/fluid/inference/api/paddle_infer_contrib.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_pass_builder.h"
+#include "paddle/fluid/inference/api/paddle_tensor.h"
 #include "paddle/fluid/inference/utils/io_utils.h"
 #include "paddle/fluid/pybind/eager.h"
 #include "paddle/fluid/pybind/eager_utils.h"
@@ -1095,7 +1096,11 @@ void BindPaddleInferPredictor(py::module *m) {
      .def("clear_intermediate_tensor",
           &paddle_infer::Predictor::ClearIntermediateTensor)
      .def("register_output_hook",
-           &paddle_infer::Predictor::RegisterOutputHook);
+           py::overload_cast<const paddle_infer::OutputTensorHookFunc &>(
+               &paddle_infer::Predictor::RegisterOutputHook))
+      .def("register_output_hook_v2",
+           py::overload_cast<const paddle_infer::OutputTensorHookFunc_V2 &>(
+               &paddle_infer::Predictor::RegisterOutputHook));
 }

 void BindZeroCopyTensor(py::module *m) {

--- a/test/cpp/inference/api/analysis_predictor_tester.cc
+++ b/test/cpp/inference/api/analysis_predictor_tester.cc
@@ -667,7 +667,7 @@ TEST(Predictor, Streams) {
 }
 #endif

-TEST(AnalysisPredictor, OutputHookFunc) {
+TEST(AnalysisPredictor, OutputTensorHookFunc) {
  auto hookfunc = [](const std::string& type,
                     const std::string& var_name,
                     const Tensor& tensor) { LOG(INFO) << "in hook function"; };
@@ -714,4 +714,53 @@ TEST(AnalysisPredictor, OutputHookFunc) {
  }
 }

+TEST(AnalysisPredictor, OutputTensorHookFunc_V2) {
+  auto hookfunc = [](const std::string& type,
+                     const std::string& var_name,
+                     const paddle::Tensor& tensor) {
+    LOG(INFO) << "in hook function";
+  };
+
+  {
+    Config config;
+    config.SetModel(FLAGS_dirname);
+    config.EnableUseGpu(100, 0);
+
+    auto predictor = CreatePredictor(config);
+
+    predictor->RegisterOutputHook(hookfunc);
+    auto w0 = predictor->GetInputHandle("firstw");
+    auto w1 = predictor->GetInputHandle("secondw");
+    auto w2 = predictor->GetInputHandle("thirdw");
+    auto w3 = predictor->GetInputHandle("forthw");
+    w0->Reshape({4, 1});
+    w1->Reshape({4, 1});
+    w2->Reshape({4, 1});
+    w3->Reshape({4, 1});
+    auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
+    auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
+    auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
+    auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
+    for (int i = 0; i < 4; i++) {
+      w0_data[i] = i;
+      w1_data[i] = i;
+      w2_data[i] = i;
+      w3_data[i] = i;
+    }
+    predictor->Run();
+    predictor->TryShrinkMemory();
+  }
+
+  {
+    Config config;
+    config.SetModel(FLAGS_dirname);
+    config.EnableMemoryOptim();
+    config.EnableUseGpu(100, 0);
+
+    auto predictor = CreatePredictor(config);
+
+    predictor->RegisterOutputHook(hookfunc);
+  }
+}
+
 }  // namespace paddle_infer