From 418d2796e95b36f49ad23defda0c5fe8cfe80e57 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Thu, 8 Jun 2023 11:13:37 +0800 Subject: [PATCH] output tensor hook support while op (#54432) --- paddle/fluid/framework/naive_executor.cc | 10 +++- paddle/fluid/framework/naive_executor.h | 4 +- .../framework/new_executor/interpretercore.cc | 4 ++ .../framework/new_executor/interpretercore.h | 7 +++ paddle/fluid/framework/operator.h | 7 +++ .../fluid/inference/api/analysis_predictor.cc | 55 +++++-------------- .../fluid/inference/api/analysis_predictor.h | 13 +---- paddle/fluid/inference/api/paddle_api.h | 13 +---- .../inference/api/paddle_inference_api.h | 10 ---- paddle/fluid/inference/api/paddle_tensor.h | 6 +- .../fluid/operators/controlflow/while_op.cc | 2 + paddle/fluid/pybind/inference_api.cc | 6 +- .../api/analysis_predictor_tester.cc | 47 ---------------- 13 files changed, 48 insertions(+), 136 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index c31e0661140..28cabf54ee4 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -66,6 +66,10 @@ void NaiveExecutor::Run() { platform::NvtxRangeColor::Green); #endif + if (op->Type() == "while") { + op->SetOutputHooks(hookfuncs_); + } + op->Run(*scope_, place_); // Update the shared_holder so that only records the max one. @@ -97,8 +101,8 @@ void NaiveExecutor::Run() { #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePop(); #endif - for (auto &func : hookfunc_) { - func(op.get()); + for (auto &func : hookfuncs_) { + func(op.get(), scope_); } } #ifdef PADDLE_WITH_INFERENCE_NVTX @@ -178,7 +182,7 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { } void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { - hookfunc_.push_back(hookfunc); + hookfuncs_.push_back(hookfunc); } void NaiveExecutor::MakeReusePlan( diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index f1a4a036cde..8361d79fd18 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -38,7 +38,7 @@ class Scope; class NaiveExecutor { public: - using HookFunc = std::function; + using HookFunc = std::function; explicit NaiveExecutor(const platform::Place& place) : place_(place) {} @@ -86,7 +86,7 @@ class NaiveExecutor { std::vector> ops_; Scope* scope_{nullptr}; - std::vector hookfunc_; + std::vector hookfuncs_; // Record information that tensor_a should ShareBufferWith tensor_b. std::unordered_map> diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 2e47699499f..dc3674e8d80 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -949,6 +949,10 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) { #endif } + for (auto& hook : hookfuncs_) { + hook(op, local_scope); + } + // for debug nan/inf if (op_with_kernel != nullptr && FLAGS_check_nan_inf) { VLOG(4) << "Check nan/inf"; diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index cf335f2b0bd..904bfc5ec69 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -77,6 +77,11 @@ class InterpreterCore { const platform::Place& GetPlace() const { return place_; } + using HookFunc = std::function; + void SetOutputHooks(const std::vector& hookfuncs) { + hookfuncs_ = hookfuncs; + } + private: DISABLE_COPY_AND_ASSIGN(InterpreterCore); // build graph @@ -184,6 +189,8 @@ class InterpreterCore { std::vector trace_execute_order_; InstructionSchedulingPriorityLess instruction_scheduling_priority_less; + + std::vector hookfuncs_; }; } // namespace framework diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 3de6560653c..e6a2058107b 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -371,6 +371,11 @@ class OperatorBase { void SetId(uint64_t id) { id_ = id; } + using HookFunc = std::function; + void SetOutputHooks(const std::vector& hookfuncs) { + hookfuncs_ = hookfuncs; + } + protected: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: @@ -399,6 +404,8 @@ class OperatorBase { // Whether this operator executes in an Executor. bool run_by_executor_{true}; + std::vector hookfuncs_; + private: void GenerateTemporaryNames(); void CheckAllInputOutputSet() const; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 26c8b57d37b..4fcad3c7c11 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2638,47 +2638,26 @@ void AnalysisPredictor::RegisterOutputHook( const OutputTensorHookFunc &hookfunc) { static std::once_flag register_hook_flag; std::call_once(register_hook_flag, [this] { - executor_->RegisterOutputHook([this](framework::OperatorBase *op) { - for (auto &output : op->Outputs()) { - for (auto &var_name : output.second) { - auto *var = this->sub_scope_->FindVar(var_name); - if (!var || !var->IsType()) continue; - auto dense_tensor = var->Get(); - if (!dense_tensor.initialized()) continue; - auto tensor = this->GetOutputTensor(var_name); - for (auto &hookfunc : this->hookfuncs_) { - hookfunc(op->Type(), var_name, *tensor); + executor_->RegisterOutputHook( + [this](framework::OperatorBase *op, framework::Scope *scope) { + for (auto &output : op->Outputs()) { + for (auto &var_name : output.second) { + auto *var = scope->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = paddle::Tensor( + std::make_shared(dense_tensor), var_name); + for (auto &hookfunc : this->hookfuncs_) { + hookfunc(op->Type(), var_name, tensor); + } + } } - } - } - }); + }); }); hookfuncs_.push_back(hookfunc); } -void AnalysisPredictor::RegisterOutputHook( - const OutputTensorHookFunc_V2 &hookfunc) { - static std::once_flag register_hook_flag; - std::call_once(register_hook_flag, [this] { - executor_->RegisterOutputHook([this](framework::OperatorBase *op) { - for (auto &output : op->Outputs()) { - for (auto &var_name : output.second) { - auto *var = this->sub_scope_->FindVar(var_name); - if (!var || !var->IsType()) continue; - auto dense_tensor = var->Get(); - if (!dense_tensor.initialized()) continue; - auto tensor = paddle::Tensor( - std::make_shared(dense_tensor), var_name); - for (auto &hookfunc : this->hookfuncs_v2_) { - hookfunc(op->Type(), var_name, tensor); - } - } - } - }); - }); - hookfuncs_v2_.push_back(hookfunc); -} - template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { @@ -2964,10 +2943,6 @@ void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) { predictor_->RegisterOutputHook(hookfunc); } -void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) { - predictor_->RegisterOutputHook(hookfunc); -} - void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); } int GetNumBytesOfDataType(DataType dtype) { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 144d40e7cd2..f047e68b5cc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -318,16 +318,6 @@ class AnalysisPredictor : public PaddlePredictor { /// Argument::fusion_statis_t fusion_statis() { return fusion_statis_; } - /// - /// \brief Register a output hook function to operate the intermediate tensor - /// of op output. when using this function, memory reuse should be tured off. - /// The hook function signature is void(const std::string&, const - /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is - /// op's type, the second param is output var name of the op, and the third - /// parameter is output tensor with the var name. - /// - void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override; - /// /// \brief Register a output hook function to operate the intermediate tensor /// of op output. when using this function, memory reuse should be tured off. @@ -336,7 +326,7 @@ class AnalysisPredictor : public PaddlePredictor { /// type, the second param is output var name of the op, and the third /// parameter is output tensor with the var name. /// - void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override; + void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override; /// /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass @@ -608,7 +598,6 @@ class AnalysisPredictor : public PaddlePredictor { private: std::vector hookfuncs_; - std::vector hookfuncs_v2_; // Some status here that help to determine the status inside the predictor. bool status_is_cloned_{false}; diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 1dc4215a496..211f6b59539 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -39,7 +39,6 @@ using PaddleDType = paddle_infer::DataType; using PaddlePlace = paddle_infer::PlaceType; using PaddleDataLayout = paddle_infer::DataLayout; using paddle_infer::OutputTensorHookFunc; -using paddle_infer::OutputTensorHookFunc_V2; /// \brief Memory manager for PaddleTensor. /// @@ -314,16 +313,6 @@ class PD_INFER_DECL PaddlePredictor { /// virtual uint64_t TryShrinkMemory() { return 0; } - /// - /// \brief Register a output hook function to operate the intermediate tensor - /// of op output. when using this function, memory reuse should be tured off. - /// The hook function signature is void(const std::string&, const - /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is - /// op's type, the second param is output var name of the op, and the third - /// parameter is output tensor with the var name. - /// - virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {} - /// /// \brief Register a output hook function to operate the intermediate tensor /// of op output. when using this function, memory reuse should be tured off. @@ -332,7 +321,7 @@ class PD_INFER_DECL PaddlePredictor { /// type, the second param is output var name of the op, and the third /// parameter is output tensor with the var name. /// - virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {} + virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {} /// \brief Clone an existing predictor /// When using clone, the same network will be created, diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index fdabde89f91..aa77015ba63 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -199,16 +199,6 @@ class PD_INFER_DECL Predictor { /// void RegisterOutputHook(const OutputTensorHookFunc& hookfunc); - /// - /// \brief Register a output hook function to operate the intermediate tensor - /// of op output. when using this function, memory reuse should be tured off. - /// The hook function signature is void(const std::string&, const - /// std::string&, const Tensor&>). Here, the first parameter is op's - /// type, the second param is output var name of the op, and the third - /// parameter is output tensor with the var name. - /// - void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc); - /// /// \brief Get the execution stream on devices with a concept of stream, /// otherwise returns nullptr. diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 1b8ae09cf3c..9cc228dbb9b 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -36,11 +36,7 @@ namespace paddle_infer { /// Strings for text data. using Strings = std::vector; -class Tensor; -using OutputTensorHookFunc = - std::function; - -using OutputTensorHookFunc_V2 = std::function; typedef void (*CallbackFunc)(void*); diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index cb472fc6948..e30387b751c 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -220,6 +220,8 @@ class WhileOp : public framework::OperatorBase { dev_place, *block, &placeholder, execution_config)); } + core_->SetOutputHooks(hookfuncs_); + if (!is_test) { while (cond_data) { auto ¤t_scope = scope.NewScope(); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 32e6ff6920d..711f99e8748 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -1096,11 +1096,7 @@ void BindPaddleInferPredictor(py::module *m) { .def("clear_intermediate_tensor", &paddle_infer::Predictor::ClearIntermediateTensor) .def("register_output_hook", - py::overload_cast( - &paddle_infer::Predictor::RegisterOutputHook)) - .def("register_output_hook_v2", - py::overload_cast( - &paddle_infer::Predictor::RegisterOutputHook)); + &paddle_infer::Predictor::RegisterOutputHook); } void BindZeroCopyTensor(py::module *m) { diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index 84070cf39bc..6e3497d14a0 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -668,53 +668,6 @@ TEST(Predictor, Streams) { #endif TEST(AnalysisPredictor, OutputTensorHookFunc) { - auto hookfunc = [](const std::string& type, - const std::string& var_name, - const Tensor& tensor) { LOG(INFO) << "in hook function"; }; - - { - Config config; - config.SetModel(FLAGS_dirname); - config.EnableUseGpu(100, 0); - - auto predictor = CreatePredictor(config); - - predictor->RegisterOutputHook(hookfunc); - auto w0 = predictor->GetInputHandle("firstw"); - auto w1 = predictor->GetInputHandle("secondw"); - auto w2 = predictor->GetInputHandle("thirdw"); - auto w3 = predictor->GetInputHandle("forthw"); - w0->Reshape({4, 1}); - w1->Reshape({4, 1}); - w2->Reshape({4, 1}); - w3->Reshape({4, 1}); - auto* w0_data = w0->mutable_data(PlaceType::kCPU); - auto* w1_data = w1->mutable_data(PlaceType::kCPU); - auto* w2_data = w2->mutable_data(PlaceType::kCPU); - auto* w3_data = w3->mutable_data(PlaceType::kCPU); - for (int i = 0; i < 4; i++) { - w0_data[i] = i; - w1_data[i] = i; - w2_data[i] = i; - w3_data[i] = i; - } - predictor->Run(); - predictor->TryShrinkMemory(); - } - - { - Config config; - config.SetModel(FLAGS_dirname); - config.EnableMemoryOptim(); - config.EnableUseGpu(100, 0); - - auto predictor = CreatePredictor(config); - - predictor->RegisterOutputHook(hookfunc); - } -} - -TEST(AnalysisPredictor, OutputTensorHookFunc_V2) { auto hookfunc = [](const std::string& type, const std::string& var_name, const paddle::Tensor& tensor) { -- GitLab