未验证 提交 f1c9c505 编写于 作者: Y Yuanle Liu 提交者: GitHub

update predictor.register_output_hook: support paddle::Tensor (#54254)

上级 aeee5b1e
...@@ -137,8 +137,9 @@ void DoInsertCastOp(Graph* graph, ...@@ -137,8 +137,9 @@ void DoInsertCastOp(Graph* graph,
if (cache->count(var_node) == 0) { if (cache->count(var_node) == 0) {
// insert cast op between var_node and op_node // insert cast op between var_node and op_node
std::string cast_input_name = var_node->Var()->Name(); std::string cast_input_name = var_node->Var()->Name();
std::string cast_output_name = std::string cast_output_name = var_node->Var()->Name() +
var_node->Var()->Name() + "_cast.tmp_" + std::to_string((*suffix)++); "_cast_auto_mixed.tmp_" +
std::to_string((*suffix)++);
framework::OpDesc cast_op_desc(block_desc); framework::OpDesc cast_op_desc(block_desc);
update_cast_desc(cast_op_desc, update_cast_desc(cast_op_desc,
cast_input_name, cast_input_name,
......
...@@ -2634,7 +2634,8 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { ...@@ -2634,7 +2634,8 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
exe.Run(save_program, scope(), 0, true, true); exe.Run(save_program, scope(), 0, true, true);
} }
void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_hook_flag; static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] { std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) { executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
...@@ -2655,6 +2656,29 @@ void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { ...@@ -2655,6 +2656,29 @@ void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
hookfuncs_.push_back(hookfunc); hookfuncs_.push_back(hookfunc);
} }
void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc_V2 &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = paddle::Tensor(
std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
for (auto &hookfunc : this->hookfuncs_v2_) {
hookfunc(op->Type(), var_name, tensor);
}
}
}
});
});
hookfuncs_v2_.push_back(hookfunc);
}
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>( std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
const AnalysisConfig &config) { const AnalysisConfig &config) {
...@@ -2936,7 +2960,11 @@ void Predictor::ClearIntermediateTensor() { ...@@ -2936,7 +2960,11 @@ void Predictor::ClearIntermediateTensor() {
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); } uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
void Predictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}
void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) {
predictor_->RegisterOutputHook(hookfunc); predictor_->RegisterOutputHook(hookfunc);
} }
......
...@@ -322,11 +322,21 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -322,11 +322,21 @@ class AnalysisPredictor : public PaddlePredictor {
/// \brief Register a output hook function to operate the intermediate tensor /// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off. /// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const /// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third /// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name. /// parameter is output tensor with the var name.
/// ///
void RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) override; void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override;
/// ///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
...@@ -597,7 +607,8 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -597,7 +607,8 @@ class AnalysisPredictor : public PaddlePredictor {
int root_predictor_id_{-1}; int root_predictor_id_{-1};
private: private:
std::vector<Exp_OutputHookFunc> hookfuncs_; std::vector<OutputTensorHookFunc> hookfuncs_;
std::vector<OutputTensorHookFunc_V2> hookfuncs_v2_;
// Some status here that help to determine the status inside the predictor. // Some status here that help to determine the status inside the predictor.
bool status_is_cloned_{false}; bool status_is_cloned_{false};
......
...@@ -38,7 +38,8 @@ namespace paddle { ...@@ -38,7 +38,8 @@ namespace paddle {
using PaddleDType = paddle_infer::DataType; using PaddleDType = paddle_infer::DataType;
using PaddlePlace = paddle_infer::PlaceType; using PaddlePlace = paddle_infer::PlaceType;
using PaddleDataLayout = paddle_infer::DataLayout; using PaddleDataLayout = paddle_infer::DataLayout;
using paddle_infer::Exp_OutputHookFunc; using paddle_infer::OutputTensorHookFunc;
using paddle_infer::OutputTensorHookFunc_V2;
/// \brief Memory manager for PaddleTensor. /// \brief Memory manager for PaddleTensor.
/// ///
...@@ -317,11 +318,21 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -317,11 +318,21 @@ class PD_INFER_DECL PaddlePredictor {
/// \brief Register a output hook function to operate the intermediate tensor /// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off. /// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const /// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's /// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third /// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name. /// parameter is output tensor with the var name.
/// ///
virtual void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc) {} virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {}
/// \brief Clone an existing predictor /// \brief Clone an existing predictor
/// When using clone, the same network will be created, /// When using clone, the same network will be created,
......
...@@ -197,7 +197,17 @@ class PD_INFER_DECL Predictor { ...@@ -197,7 +197,17 @@ class PD_INFER_DECL Predictor {
/// type, the second param is output var name of the op, and the third /// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name. /// parameter is output tensor with the var name.
/// ///
void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc); void RegisterOutputHook(const OutputTensorHookFunc& hookfunc);
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc);
/// ///
/// \brief Get the execution stream on devices with a concept of stream, /// \brief Get the execution stream on devices with a concept of stream,
......
...@@ -37,9 +37,12 @@ namespace paddle_infer { ...@@ -37,9 +37,12 @@ namespace paddle_infer {
using Strings = std::vector<std::string>; using Strings = std::vector<std::string>;
class Tensor; class Tensor;
using Exp_OutputHookFunc = using OutputTensorHookFunc =
std::function<void(const std::string&, const std::string&, const Tensor&)>; std::function<void(const std::string&, const std::string&, const Tensor&)>;
using OutputTensorHookFunc_V2 = std::function<void(
const std::string&, const std::string&, const paddle::Tensor&)>;
typedef void (*CallbackFunc)(void*); typedef void (*CallbackFunc)(void*);
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST) #if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
......
...@@ -291,8 +291,9 @@ class OpConverter { ...@@ -291,8 +291,9 @@ class OpConverter {
if (parameters.count(input)) continue; if (parameters.count(input)) continue;
// NOTE(liuyuanle): It is a trick. If you need a name [input], then you // NOTE(liuyuanle): It is a trick. If you need a name [input], then you
// need to use [input.substr(0, idx)]. // need to use [input.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass. // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
auto idx = input.find("_cast.tmp_"); // auto_mixed_precision_pass.
auto idx = input.find("_cast_auto_mixed.tmp_");
input = input.substr(0, idx); input = input.substr(0, idx);
auto* var = block_desc->FindVar(input); auto* var = block_desc->FindVar(input);
......
...@@ -315,9 +315,10 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -315,9 +315,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto name : runtime_input_names_) { for (auto name : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [name], then you need // NOTE(liuyuanle): It is a trick. If you need a [name], then you need
// to use [name.substr(0, idx)]. // to use [name.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass. // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
std::string name_real = name; std::string name_real = name;
auto idx = name.find("_cast.tmp_"); auto idx = name.find("_cast_auto_mixed.tmp_");
name = name.substr(0, idx); name = name.substr(0, idx);
auto &t = inference::analysis::GetFromScope<phi::DenseTensor>( auto &t = inference::analysis::GetFromScope<phi::DenseTensor>(
...@@ -387,9 +388,9 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -387,9 +388,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto x : runtime_input_names_) { for (auto x : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [x], then you need // NOTE(liuyuanle): It is a trick. If you need a [x], then you need
// to use [x.substr(0, idx)]. // to use [x.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass. // auto_mixed_precision_pass.
auto idx = x.find("_cast.tmp_"); auto idx = x.find("_cast_auto_mixed.tmp_");
x = x.substr(0, idx); x = x.substr(0, idx);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -560,9 +561,10 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -560,9 +561,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto x : runtime_input_names_) { for (auto x : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [x], then you need // NOTE(liuyuanle): It is a trick. If you need a [x], then you need
// to use [x.substr(0, idx)]. // to use [x.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass. // Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
std::string x_real = x; std::string x_real = x;
auto idx = x.find("_cast.tmp_"); auto idx = x.find("_cast_auto_mixed.tmp_");
x = x.substr(0, idx); x = x.substr(0, idx);
#if IS_TRT_VERSION_LT(8000) #if IS_TRT_VERSION_LT(8000)
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "paddle/fluid/inference/api/paddle_infer_contrib.h" #include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/api/paddle_tensor.h"
#include "paddle/fluid/inference/utils/io_utils.h" #include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/eager_utils.h"
...@@ -1095,7 +1096,11 @@ void BindPaddleInferPredictor(py::module *m) { ...@@ -1095,7 +1096,11 @@ void BindPaddleInferPredictor(py::module *m) {
.def("clear_intermediate_tensor", .def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor) &paddle_infer::Predictor::ClearIntermediateTensor)
.def("register_output_hook", .def("register_output_hook",
&paddle_infer::Predictor::RegisterOutputHook); py::overload_cast<const paddle_infer::OutputTensorHookFunc &>(
&paddle_infer::Predictor::RegisterOutputHook))
.def("register_output_hook_v2",
py::overload_cast<const paddle_infer::OutputTensorHookFunc_V2 &>(
&paddle_infer::Predictor::RegisterOutputHook));
} }
void BindZeroCopyTensor(py::module *m) { void BindZeroCopyTensor(py::module *m) {
......
...@@ -667,7 +667,7 @@ TEST(Predictor, Streams) { ...@@ -667,7 +667,7 @@ TEST(Predictor, Streams) {
} }
#endif #endif
TEST(AnalysisPredictor, OutputHookFunc) { TEST(AnalysisPredictor, OutputTensorHookFunc) {
auto hookfunc = [](const std::string& type, auto hookfunc = [](const std::string& type,
const std::string& var_name, const std::string& var_name,
const Tensor& tensor) { LOG(INFO) << "in hook function"; }; const Tensor& tensor) { LOG(INFO) << "in hook function"; };
...@@ -714,4 +714,53 @@ TEST(AnalysisPredictor, OutputHookFunc) { ...@@ -714,4 +714,53 @@ TEST(AnalysisPredictor, OutputHookFunc) {
} }
} }
TEST(AnalysisPredictor, OutputTensorHookFunc_V2) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const paddle::Tensor& tensor) {
LOG(INFO) << "in hook function";
};
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
predictor->TryShrinkMemory();
}
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableMemoryOptim();
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
}
}
} // namespace paddle_infer } // namespace paddle_infer
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册