未验证 提交 db323927 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Paddle Inference] add RegisterOutputHook interface (#47050)

上级 a341bb8c
......@@ -65,6 +65,9 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePop();
#endif
if (hookfunc_) {
hookfunc_(op.get());
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePop();
......@@ -142,14 +145,8 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
return tensor;
}
void NaiveExecutor::CleanFeedFetchOps() {
std::vector<std::unique_ptr<OperatorBase>> ops;
for (auto &op : ops_) {
if (op->Type() != "feed" && op->Type() != "fetch") {
ops.emplace_back(std::move(op));
}
}
ops_.swap(ops);
void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
hookfunc_ = hookfunc;
}
NaiveExecutor::~NaiveExecutor() {
......
......@@ -14,6 +14,7 @@
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
......@@ -24,10 +25,6 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace phi {
class DenseTensor;
} // namespace phi
namespace paddle {
namespace framework {
......@@ -40,6 +37,8 @@ class Scope;
class NaiveExecutor {
public:
using HookFunc = std::function<void(OperatorBase*)>;
explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
~NaiveExecutor();
......@@ -66,13 +65,13 @@ class NaiveExecutor {
// Get an tensor to operating directly, without the need for feed_ops.
phi::DenseTensor* FindTensor(const std::string& name);
Scope* scope() { return scope_; }
void CleanFeedFetchOps();
Scope* GetScope() { return scope_; }
void ResetTrtOps(int num);
protected:
void RegisterOutputHook(const HookFunc& hookfunc);
private:
void CreateOps(const ProgramDesc& desc,
int block_id,
bool with_feed_fetch_ops);
......@@ -81,7 +80,9 @@ class NaiveExecutor {
const platform::Place place_;
// Catch the required resource to avoid recreate.
std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_;
Scope* scope_{nullptr};
HookFunc hookfunc_{nullptr};
};
} // namespace framework
......
......@@ -32,6 +32,7 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/var_type_traits.h"
......@@ -1557,10 +1558,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
if (config_.dist_config().use_dist_model()) {
scope = scope_.get();
} else {
scope = executor_->scope();
scope = executor_->GetScope();
}
#else
scope = executor_->scope();
scope = executor_->GetScope();
#endif
PADDLE_ENFORCE_NOT_NULL(
scope->FindVar(name),
......@@ -1612,10 +1613,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
if (config_.dist_config().use_dist_model()) {
scope = scope_.get();
} else {
scope = executor_->scope();
scope = executor_->GetScope();
}
#else
scope = executor_->scope();
scope = executor_->GetScope();
#endif
PADDLE_ENFORCE_NOT_NULL(
scope->FindVar(name),
......@@ -1997,7 +1998,7 @@ void AnalysisPredictor::ClearIntermediateTensor() {
for (auto *var : global_block->AllVars()) {
if (!IsPersistable(var)) {
const std::string name = var->Name();
auto *variable = executor_->scope()->FindVar(name);
auto *variable = executor_->GetScope()->FindVar(name);
if (variable != nullptr && variable->IsType<phi::DenseTensor>() &&
name != "feed" && name != "fetch") {
VLOG(3) << "Clear Intermediate Tensor: " << name;
......@@ -2178,6 +2179,33 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
exe.Run(save_program, scope(), 0, true, true);
}
void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
if (config_.enable_memory_optim()) {
LOG(WARNING) << "If you want to run output hook function, you should "
"use config.EnableMemoryOptim(false) to turn off memory "
"reuse!";
return;
}
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = this->GetOutputTensor(var_name);
for (auto &hookfunc : this->hookfuncs_) {
hookfunc(op->Type(), var_name, *tensor);
}
}
}
});
});
hookfuncs_.push_back(hookfunc);
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
const AnalysisConfig &config) {
......@@ -2371,6 +2399,10 @@ void Predictor::ClearIntermediateTensor() {
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
void Predictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}
void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); }
int GetNumBytesOfDataType(DataType dtype) {
......@@ -2452,10 +2484,9 @@ PredictorPool::PredictorPool(const Config &config, size_t size) {
for (size_t i = 0; i < size - 1; i++) {
if (config.tensorrt_engine_enabled()) {
Config config_tmp(copy_config);
preds_.push_back(
std::move(std::unique_ptr<Predictor>(new Predictor(config_tmp))));
preds_.emplace_back(new Predictor(config_tmp));
} else {
preds_.push_back(std::move(main_pred_->Clone()));
preds_.emplace_back(main_pred_->Clone());
}
}
}
......
......@@ -272,6 +272,16 @@ class AnalysisPredictor : public PaddlePredictor {
///
std::string GetSerializedProgram() const override;
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) override;
///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
///
......@@ -510,6 +520,8 @@ class AnalysisPredictor : public PaddlePredictor {
int predictor_id_;
private:
std::vector<Exp_OutputHookFunc> hookfuncs_;
// Some status here that help to determine the status inside the predictor.
bool status_is_cloned_{false};
......
......@@ -611,4 +611,51 @@ TEST(Predictor, Streams) {
}
#endif
TEST(AnalysisPredictor, OutputHookFunc) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const Tensor& tensor) { LOG(INFO) << "in hook function"; };
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
predictor->TryShrinkMemory();
}
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableMemoryOptim();
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
}
}
} // namespace paddle_infer
......@@ -38,6 +38,7 @@ namespace paddle {
using PaddleDType = paddle_infer::DataType;
using PaddlePlace = paddle_infer::PlaceType;
using PaddleDataLayout = paddle_infer::DataLayout;
using paddle_infer::Exp_OutputHookFunc;
/// \brief Memory manager for PaddleTensor.
///
......@@ -289,6 +290,16 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual uint64_t TryShrinkMemory() { return 0; }
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc) {}
/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
/// and the parameters between them are shared.
......
......@@ -157,6 +157,16 @@ class PD_INFER_DECL Predictor {
///
uint64_t TryShrinkMemory();
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc);
///
/// \brief Get the execution stream on devices with a concept of stream,
/// otherwise returns nullptr.
......
......@@ -14,7 +14,10 @@
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "paddle_infer_declare.h" // NOLINT
......@@ -29,6 +32,10 @@ namespace paddle_infer {
/// Strings for text data.
using Strings = std::vector<std::string>;
class Tensor;
using Exp_OutputHookFunc =
std::function<void(const std::string&, const std::string&, const Tensor&)>;
typedef void (*CallbackFunc)(void*);
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/pybind/inference_api.h"
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
......@@ -946,7 +947,9 @@ void BindPaddleInferPredictor(py::module *m) {
#endif
.def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor);
&paddle_infer::Predictor::ClearIntermediateTensor)
.def("register_output_hook",
&paddle_infer::Predictor::RegisterOutputHook);
}
void BindZeroCopyTensor(py::module *m) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册