From db3239273ccba4b974c2e83b28a3fd40c0fa99e6 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Tue, 1 Nov 2022 20:03:44 +0800 Subject: [PATCH] [Paddle Inference] add RegisterOutputHook interface (#47050) --- paddle/fluid/framework/naive_executor.cc | 13 ++--- paddle/fluid/framework/naive_executor.h | 19 ++++---- .../fluid/inference/api/analysis_predictor.cc | 47 +++++++++++++++---- .../fluid/inference/api/analysis_predictor.h | 12 +++++ .../api/analysis_predictor_tester.cc | 47 +++++++++++++++++++ paddle/fluid/inference/api/paddle_api.h | 11 +++++ .../inference/api/paddle_inference_api.h | 10 ++++ paddle/fluid/inference/api/paddle_tensor.h | 7 +++ paddle/fluid/pybind/inference_api.cc | 5 +- 9 files changed, 145 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index eb4ad8d0daf..52ed842d74e 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -65,6 +65,9 @@ void NaiveExecutor::Run() { #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePop(); #endif + if (hookfunc_) { + hookfunc_(op.get()); + } } #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePop(); @@ -142,14 +145,8 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { return tensor; } -void NaiveExecutor::CleanFeedFetchOps() { - std::vector> ops; - for (auto &op : ops_) { - if (op->Type() != "feed" && op->Type() != "fetch") { - ops.emplace_back(std::move(op)); - } - } - ops_.swap(ops); +void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { + hookfunc_ = hookfunc; } NaiveExecutor::~NaiveExecutor() { diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 8ca3f5997af..882f50b451a 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -24,10 +25,6 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" -namespace phi { -class DenseTensor; -} // namespace phi - namespace paddle { namespace framework { @@ -40,6 +37,8 @@ class Scope; class NaiveExecutor { public: + using HookFunc = std::function; + explicit NaiveExecutor(const platform::Place& place) : place_(place) {} ~NaiveExecutor(); @@ -66,13 +65,13 @@ class NaiveExecutor { // Get an tensor to operating directly, without the need for feed_ops. phi::DenseTensor* FindTensor(const std::string& name); - Scope* scope() { return scope_; } - - void CleanFeedFetchOps(); + Scope* GetScope() { return scope_; } void ResetTrtOps(int num); - protected: + void RegisterOutputHook(const HookFunc& hookfunc); + + private: void CreateOps(const ProgramDesc& desc, int block_id, bool with_feed_fetch_ops); @@ -81,7 +80,9 @@ class NaiveExecutor { const platform::Place place_; // Catch the required resource to avoid recreate. std::vector> ops_; - Scope* scope_; + Scope* scope_{nullptr}; + + HookFunc hookfunc_{nullptr}; }; } // namespace framework diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 9197efc2a5e..280427cb4c8 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -32,6 +32,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_proto_maker.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/var_type_traits.h" @@ -1557,10 +1558,10 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( if (config_.dist_config().use_dist_model()) { scope = scope_.get(); } else { - scope = executor_->scope(); + scope = executor_->GetScope(); } #else - scope = executor_->scope(); + scope = executor_->GetScope(); #endif PADDLE_ENFORCE_NOT_NULL( scope->FindVar(name), @@ -1612,10 +1613,10 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( if (config_.dist_config().use_dist_model()) { scope = scope_.get(); } else { - scope = executor_->scope(); + scope = executor_->GetScope(); } #else - scope = executor_->scope(); + scope = executor_->GetScope(); #endif PADDLE_ENFORCE_NOT_NULL( scope->FindVar(name), @@ -1997,7 +1998,7 @@ void AnalysisPredictor::ClearIntermediateTensor() { for (auto *var : global_block->AllVars()) { if (!IsPersistable(var)) { const std::string name = var->Name(); - auto *variable = executor_->scope()->FindVar(name); + auto *variable = executor_->GetScope()->FindVar(name); if (variable != nullptr && variable->IsType() && name != "feed" && name != "fetch") { VLOG(3) << "Clear Intermediate Tensor: " << name; @@ -2178,6 +2179,33 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { exe.Run(save_program, scope(), 0, true, true); } +void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { + if (config_.enable_memory_optim()) { + LOG(WARNING) << "If you want to run output hook function, you should " + "use config.EnableMemoryOptim(false) to turn off memory " + "reuse!"; + return; + } + static std::once_flag register_hook_flag; + std::call_once(register_hook_flag, [this] { + executor_->RegisterOutputHook([this](framework::OperatorBase *op) { + for (auto &output : op->Outputs()) { + for (auto &var_name : output.second) { + auto *var = this->sub_scope_->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = this->GetOutputTensor(var_name); + for (auto &hookfunc : this->hookfuncs_) { + hookfunc(op->Type(), var_name, *tensor); + } + } + } + }); + }); + hookfuncs_.push_back(hookfunc); +} + template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { @@ -2371,6 +2399,10 @@ void Predictor::ClearIntermediateTensor() { uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); } +void Predictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { + predictor_->RegisterOutputHook(hookfunc); +} + void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); } int GetNumBytesOfDataType(DataType dtype) { @@ -2452,10 +2484,9 @@ PredictorPool::PredictorPool(const Config &config, size_t size) { for (size_t i = 0; i < size - 1; i++) { if (config.tensorrt_engine_enabled()) { Config config_tmp(copy_config); - preds_.push_back( - std::move(std::unique_ptr(new Predictor(config_tmp)))); + preds_.emplace_back(new Predictor(config_tmp)); } else { - preds_.push_back(std::move(main_pred_->Clone())); + preds_.emplace_back(main_pred_->Clone()); } } } diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index d1dd921db14..37d1511fa27 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -272,6 +272,16 @@ class AnalysisPredictor : public PaddlePredictor { /// std::string GetSerializedProgram() const override; + /// + /// \brief Register a output hook function to operate the intermediate tensor + /// of op output. when using this function, memory reuse should be tured off. + /// The hook function signature is void(const std::string&, const + /// std::string&, const Tensor&>). Here, the first parameter is op's + /// type, the second param is output var name of the op, and the third + /// parameter is output tensor with the var name. + /// + void RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) override; + /// /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass /// @@ -510,6 +520,8 @@ class AnalysisPredictor : public PaddlePredictor { int predictor_id_; private: + std::vector hookfuncs_; + // Some status here that help to determine the status inside the predictor. bool status_is_cloned_{false}; diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 8856ceb61a7..5cba8f06ab9 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -611,4 +611,51 @@ TEST(Predictor, Streams) { } #endif +TEST(AnalysisPredictor, OutputHookFunc) { + auto hookfunc = [](const std::string& type, + const std::string& var_name, + const Tensor& tensor) { LOG(INFO) << "in hook function"; }; + + { + Config config; + config.SetModel(FLAGS_dirname); + config.EnableUseGpu(100, 0); + + auto predictor = CreatePredictor(config); + + predictor->RegisterOutputHook(hookfunc); + auto w0 = predictor->GetInputHandle("firstw"); + auto w1 = predictor->GetInputHandle("secondw"); + auto w2 = predictor->GetInputHandle("thirdw"); + auto w3 = predictor->GetInputHandle("forthw"); + w0->Reshape({4, 1}); + w1->Reshape({4, 1}); + w2->Reshape({4, 1}); + w3->Reshape({4, 1}); + auto* w0_data = w0->mutable_data(PlaceType::kCPU); + auto* w1_data = w1->mutable_data(PlaceType::kCPU); + auto* w2_data = w2->mutable_data(PlaceType::kCPU); + auto* w3_data = w3->mutable_data(PlaceType::kCPU); + for (int i = 0; i < 4; i++) { + w0_data[i] = i; + w1_data[i] = i; + w2_data[i] = i; + w3_data[i] = i; + } + predictor->Run(); + predictor->TryShrinkMemory(); + } + + { + Config config; + config.SetModel(FLAGS_dirname); + config.EnableMemoryOptim(); + config.EnableUseGpu(100, 0); + + auto predictor = CreatePredictor(config); + + predictor->RegisterOutputHook(hookfunc); + } +} + } // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index ffb634ce829..ff1ec1eba30 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -38,6 +38,7 @@ namespace paddle { using PaddleDType = paddle_infer::DataType; using PaddlePlace = paddle_infer::PlaceType; using PaddleDataLayout = paddle_infer::DataLayout; +using paddle_infer::Exp_OutputHookFunc; /// \brief Memory manager for PaddleTensor. /// @@ -289,6 +290,16 @@ class PD_INFER_DECL PaddlePredictor { /// virtual uint64_t TryShrinkMemory() { return 0; } + /// + /// \brief Register a output hook function to operate the intermediate tensor + /// of op output. when using this function, memory reuse should be tured off. + /// The hook function signature is void(const std::string&, const + /// std::string&, const Tensor&>). Here, the first parameter is op's + /// type, the second param is output var name of the op, and the third + /// parameter is output tensor with the var name. + /// + virtual void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc) {} + /// \brief Clone an existing predictor /// When using clone, the same network will be created, /// and the parameters between them are shared. diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 055cf3a13fb..1a52c011b2a 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -157,6 +157,16 @@ class PD_INFER_DECL Predictor { /// uint64_t TryShrinkMemory(); + /// + /// \brief Register a output hook function to operate the intermediate tensor + /// of op output. when using this function, memory reuse should be tured off. + /// The hook function signature is void(const std::string&, const + /// std::string&, const Tensor&>). Here, the first parameter is op's + /// type, the second param is output var name of the op, and the third + /// parameter is output tensor with the var name. + /// + void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc); + /// /// \brief Get the execution stream on devices with a concept of stream, /// otherwise returns nullptr. diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index b10f051d6e4..9bc95f251eb 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -14,7 +14,10 @@ #pragma once +#include +#include #include +#include #include "paddle_infer_declare.h" // NOLINT @@ -29,6 +32,10 @@ namespace paddle_infer { /// Strings for text data. using Strings = std::vector; +class Tensor; +using Exp_OutputHookFunc = + std::function; + typedef void (*CallbackFunc)(void*); #if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST) diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 5d2a5799078..9b99cad8693 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/pybind/inference_api.h" +#include #include #include @@ -946,7 +947,9 @@ void BindPaddleInferPredictor(py::module *m) { #endif .def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory) .def("clear_intermediate_tensor", - &paddle_infer::Predictor::ClearIntermediateTensor); + &paddle_infer::Predictor::ClearIntermediateTensor) + .def("register_output_hook", + &paddle_infer::Predictor::RegisterOutputHook); } void BindZeroCopyTensor(py::module *m) { -- GitLab