未验证 提交 418d2796 编写于 作者: Y Yuanle Liu 提交者: GitHub

output tensor hook support while op (#54432)

上级 2f781a34
...@@ -66,6 +66,10 @@ void NaiveExecutor::Run() { ...@@ -66,6 +66,10 @@ void NaiveExecutor::Run() {
platform::NvtxRangeColor::Green); platform::NvtxRangeColor::Green);
#endif #endif
if (op->Type() == "while") {
op->SetOutputHooks(hookfuncs_);
}
op->Run(*scope_, place_); op->Run(*scope_, place_);
// Update the shared_holder so that only records the max one. // Update the shared_holder so that only records the max one.
...@@ -97,8 +101,8 @@ void NaiveExecutor::Run() { ...@@ -97,8 +101,8 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX #ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePop(); platform::CudaNvtxRangePop();
#endif #endif
for (auto &func : hookfunc_) { for (auto &func : hookfuncs_) {
func(op.get()); func(op.get(), scope_);
} }
} }
#ifdef PADDLE_WITH_INFERENCE_NVTX #ifdef PADDLE_WITH_INFERENCE_NVTX
...@@ -178,7 +182,7 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { ...@@ -178,7 +182,7 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
} }
void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
hookfunc_.push_back(hookfunc); hookfuncs_.push_back(hookfunc);
} }
void NaiveExecutor::MakeReusePlan( void NaiveExecutor::MakeReusePlan(
......
...@@ -38,7 +38,7 @@ class Scope; ...@@ -38,7 +38,7 @@ class Scope;
class NaiveExecutor { class NaiveExecutor {
public: public:
using HookFunc = std::function<void(OperatorBase*)>; using HookFunc = std::function<void(OperatorBase*, Scope*)>;
explicit NaiveExecutor(const platform::Place& place) : place_(place) {} explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
...@@ -86,7 +86,7 @@ class NaiveExecutor { ...@@ -86,7 +86,7 @@ class NaiveExecutor {
std::vector<std::unique_ptr<OperatorBase>> ops_; std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_{nullptr}; Scope* scope_{nullptr};
std::vector<HookFunc> hookfunc_; std::vector<HookFunc> hookfuncs_;
// Record information that tensor_a should ShareBufferWith tensor_b. // Record information that tensor_a should ShareBufferWith tensor_b.
std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>> std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
......
...@@ -949,6 +949,10 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) { ...@@ -949,6 +949,10 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
#endif #endif
} }
for (auto& hook : hookfuncs_) {
hook(op, local_scope);
}
// for debug nan/inf // for debug nan/inf
if (op_with_kernel != nullptr && FLAGS_check_nan_inf) { if (op_with_kernel != nullptr && FLAGS_check_nan_inf) {
VLOG(4) << "Check nan/inf"; VLOG(4) << "Check nan/inf";
......
...@@ -77,6 +77,11 @@ class InterpreterCore { ...@@ -77,6 +77,11 @@ class InterpreterCore {
const platform::Place& GetPlace() const { return place_; } const platform::Place& GetPlace() const { return place_; }
using HookFunc = std::function<void(OperatorBase*, Scope*)>;
void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) {
hookfuncs_ = hookfuncs;
}
private: private:
DISABLE_COPY_AND_ASSIGN(InterpreterCore); DISABLE_COPY_AND_ASSIGN(InterpreterCore);
// build graph // build graph
...@@ -184,6 +189,8 @@ class InterpreterCore { ...@@ -184,6 +189,8 @@ class InterpreterCore {
std::vector<size_t> trace_execute_order_; std::vector<size_t> trace_execute_order_;
InstructionSchedulingPriorityLess instruction_scheduling_priority_less; InstructionSchedulingPriorityLess instruction_scheduling_priority_less;
std::vector<HookFunc> hookfuncs_;
}; };
} // namespace framework } // namespace framework
......
...@@ -371,6 +371,11 @@ class OperatorBase { ...@@ -371,6 +371,11 @@ class OperatorBase {
void SetId(uint64_t id) { id_ = id; } void SetId(uint64_t id) { id_ = id; }
using HookFunc = std::function<void(OperatorBase*, Scope*)>;
void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) {
hookfuncs_ = hookfuncs;
}
protected: protected:
std::string type_; std::string type_;
// NOTE: in case of OpGrad, inputs_ contains: // NOTE: in case of OpGrad, inputs_ contains:
...@@ -399,6 +404,8 @@ class OperatorBase { ...@@ -399,6 +404,8 @@ class OperatorBase {
// Whether this operator executes in an Executor. // Whether this operator executes in an Executor.
bool run_by_executor_{true}; bool run_by_executor_{true};
std::vector<HookFunc> hookfuncs_;
private: private:
void GenerateTemporaryNames(); void GenerateTemporaryNames();
void CheckAllInputOutputSet() const; void CheckAllInputOutputSet() const;
......
...@@ -2638,45 +2638,24 @@ void AnalysisPredictor::RegisterOutputHook( ...@@ -2638,45 +2638,24 @@ void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) { const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_hook_flag; static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] { std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) { executor_->RegisterOutputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &output : op->Outputs()) { for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) { for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name); auto *var = scope->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = this->GetOutputTensor(var_name);
for (auto &hookfunc : this->hookfuncs_) {
hookfunc(op->Type(), var_name, *tensor);
}
}
}
});
});
hookfuncs_.push_back(hookfunc);
}
void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc_V2 &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue; if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>(); auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue; if (!dense_tensor.initialized()) continue;
auto tensor = paddle::Tensor( auto tensor = paddle::Tensor(
std::make_shared<phi::DenseTensor>(dense_tensor), var_name); std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
for (auto &hookfunc : this->hookfuncs_v2_) { for (auto &hookfunc : this->hookfuncs_) {
hookfunc(op->Type(), var_name, tensor); hookfunc(op->Type(), var_name, tensor);
} }
} }
} }
}); });
}); });
hookfuncs_v2_.push_back(hookfunc); hookfuncs_.push_back(hookfunc);
} }
template <> template <>
...@@ -2964,10 +2943,6 @@ void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) { ...@@ -2964,10 +2943,6 @@ void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
predictor_->RegisterOutputHook(hookfunc); predictor_->RegisterOutputHook(hookfunc);
} }
void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}
void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); } void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); }
int GetNumBytesOfDataType(DataType dtype) { int GetNumBytesOfDataType(DataType dtype) {
......
...@@ -318,16 +318,6 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -318,16 +318,6 @@ class AnalysisPredictor : public PaddlePredictor {
/// ///
Argument::fusion_statis_t fusion_statis() { return fusion_statis_; } Argument::fusion_statis_t fusion_statis() { return fusion_statis_; }
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;
/// ///
/// \brief Register a output hook function to operate the intermediate tensor /// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off. /// of op output. when using this function, memory reuse should be tured off.
...@@ -336,7 +326,7 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -336,7 +326,7 @@ class AnalysisPredictor : public PaddlePredictor {
/// type, the second param is output var name of the op, and the third /// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name. /// parameter is output tensor with the var name.
/// ///
void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override; void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;
/// ///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
...@@ -608,7 +598,6 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -608,7 +598,6 @@ class AnalysisPredictor : public PaddlePredictor {
private: private:
std::vector<OutputTensorHookFunc> hookfuncs_; std::vector<OutputTensorHookFunc> hookfuncs_;
std::vector<OutputTensorHookFunc_V2> hookfuncs_v2_;
// Some status here that help to determine the status inside the predictor. // Some status here that help to determine the status inside the predictor.
bool status_is_cloned_{false}; bool status_is_cloned_{false};
......
...@@ -39,7 +39,6 @@ using PaddleDType = paddle_infer::DataType; ...@@ -39,7 +39,6 @@ using PaddleDType = paddle_infer::DataType;
using PaddlePlace = paddle_infer::PlaceType; using PaddlePlace = paddle_infer::PlaceType;
using PaddleDataLayout = paddle_infer::DataLayout; using PaddleDataLayout = paddle_infer::DataLayout;
using paddle_infer::OutputTensorHookFunc; using paddle_infer::OutputTensorHookFunc;
using paddle_infer::OutputTensorHookFunc_V2;
/// \brief Memory manager for PaddleTensor. /// \brief Memory manager for PaddleTensor.
/// ///
...@@ -314,16 +313,6 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -314,16 +313,6 @@ class PD_INFER_DECL PaddlePredictor {
/// ///
virtual uint64_t TryShrinkMemory() { return 0; } virtual uint64_t TryShrinkMemory() { return 0; }
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}
/// ///
/// \brief Register a output hook function to operate the intermediate tensor /// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off. /// of op output. when using this function, memory reuse should be tured off.
...@@ -332,7 +321,7 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -332,7 +321,7 @@ class PD_INFER_DECL PaddlePredictor {
/// type, the second param is output var name of the op, and the third /// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name. /// parameter is output tensor with the var name.
/// ///
virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {} virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}
/// \brief Clone an existing predictor /// \brief Clone an existing predictor
/// When using clone, the same network will be created, /// When using clone, the same network will be created,
......
...@@ -199,16 +199,6 @@ class PD_INFER_DECL Predictor { ...@@ -199,16 +199,6 @@ class PD_INFER_DECL Predictor {
/// ///
void RegisterOutputHook(const OutputTensorHookFunc& hookfunc); void RegisterOutputHook(const OutputTensorHookFunc& hookfunc);
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc);
/// ///
/// \brief Get the execution stream on devices with a concept of stream, /// \brief Get the execution stream on devices with a concept of stream,
/// otherwise returns nullptr. /// otherwise returns nullptr.
......
...@@ -36,11 +36,7 @@ namespace paddle_infer { ...@@ -36,11 +36,7 @@ namespace paddle_infer {
/// Strings for text data. /// Strings for text data.
using Strings = std::vector<std::string>; using Strings = std::vector<std::string>;
class Tensor; using OutputTensorHookFunc = std::function<void(
using OutputTensorHookFunc =
std::function<void(const std::string&, const std::string&, const Tensor&)>;
using OutputTensorHookFunc_V2 = std::function<void(
const std::string&, const std::string&, const paddle::Tensor&)>; const std::string&, const std::string&, const paddle::Tensor&)>;
typedef void (*CallbackFunc)(void*); typedef void (*CallbackFunc)(void*);
......
...@@ -220,6 +220,8 @@ class WhileOp : public framework::OperatorBase { ...@@ -220,6 +220,8 @@ class WhileOp : public framework::OperatorBase {
dev_place, *block, &placeholder, execution_config)); dev_place, *block, &placeholder, execution_config));
} }
core_->SetOutputHooks(hookfuncs_);
if (!is_test) { if (!is_test) {
while (cond_data) { while (cond_data) {
auto &current_scope = scope.NewScope(); auto &current_scope = scope.NewScope();
......
...@@ -1096,11 +1096,7 @@ void BindPaddleInferPredictor(py::module *m) { ...@@ -1096,11 +1096,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("clear_intermediate_tensor", .def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor) &paddle_infer::Predictor::ClearIntermediateTensor)
.def("register_output_hook", .def("register_output_hook",
py::overload_cast<const paddle_infer::OutputTensorHookFunc &>( &paddle_infer::Predictor::RegisterOutputHook);
&paddle_infer::Predictor::RegisterOutputHook))
.def("register_output_hook_v2",
py::overload_cast<const paddle_infer::OutputTensorHookFunc_V2 &>(
&paddle_infer::Predictor::RegisterOutputHook));
} }
void BindZeroCopyTensor(py::module *m) { void BindZeroCopyTensor(py::module *m) {
......
...@@ -668,53 +668,6 @@ TEST(Predictor, Streams) { ...@@ -668,53 +668,6 @@ TEST(Predictor, Streams) {
#endif #endif
TEST(AnalysisPredictor, OutputTensorHookFunc) { TEST(AnalysisPredictor, OutputTensorHookFunc) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const Tensor& tensor) { LOG(INFO) << "in hook function"; };
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
predictor->TryShrinkMemory();
}
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableMemoryOptim();
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
}
}
TEST(AnalysisPredictor, OutputTensorHookFunc_V2) {
auto hookfunc = [](const std::string& type, auto hookfunc = [](const std::string& type,
const std::string& var_name, const std::string& var_name,
const paddle::Tensor& tensor) { const paddle::Tensor& tensor) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册