未验证 提交 f1c9c505 编写于 作者: Y Yuanle Liu 提交者: GitHub

update predictor.register_output_hook: support paddle::Tensor (#54254)

上级 aeee5b1e
......@@ -137,8 +137,9 @@ void DoInsertCastOp(Graph* graph,
if (cache->count(var_node) == 0) {
// insert cast op between var_node and op_node
std::string cast_input_name = var_node->Var()->Name();
std::string cast_output_name =
var_node->Var()->Name() + "_cast.tmp_" + std::to_string((*suffix)++);
std::string cast_output_name = var_node->Var()->Name() +
"_cast_auto_mixed.tmp_" +
std::to_string((*suffix)++);
framework::OpDesc cast_op_desc(block_desc);
update_cast_desc(cast_op_desc,
cast_input_name,
......
......@@ -2634,7 +2634,8 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
exe.Run(save_program, scope(), 0, true, true);
}
void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
......@@ -2655,6 +2656,29 @@ void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
hookfuncs_.push_back(hookfunc);
}
void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc_V2 &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = paddle::Tensor(
std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
for (auto &hookfunc : this->hookfuncs_v2_) {
hookfunc(op->Type(), var_name, tensor);
}
}
}
});
});
hookfuncs_v2_.push_back(hookfunc);
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
const AnalysisConfig &config) {
......@@ -2936,7 +2960,11 @@ void Predictor::ClearIntermediateTensor() {
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
void Predictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}
void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}
......
......@@ -322,11 +322,21 @@ class AnalysisPredictor : public PaddlePredictor {
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) override;
void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override;
///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
......@@ -597,7 +607,8 @@ class AnalysisPredictor : public PaddlePredictor {
int root_predictor_id_{-1};
private:
std::vector<Exp_OutputHookFunc> hookfuncs_;
std::vector<OutputTensorHookFunc> hookfuncs_;
std::vector<OutputTensorHookFunc_V2> hookfuncs_v2_;
// Some status here that help to determine the status inside the predictor.
bool status_is_cloned_{false};
......
......@@ -38,7 +38,8 @@ namespace paddle {
using PaddleDType = paddle_infer::DataType;
using PaddlePlace = paddle_infer::PlaceType;
using PaddleDataLayout = paddle_infer::DataLayout;
using paddle_infer::Exp_OutputHookFunc;
using paddle_infer::OutputTensorHookFunc;
using paddle_infer::OutputTensorHookFunc_V2;
/// \brief Memory manager for PaddleTensor.
///
......@@ -317,11 +318,21 @@ class PD_INFER_DECL PaddlePredictor {
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle::Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc) {}
virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {}
/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
......
......@@ -197,7 +197,17 @@ class PD_INFER_DECL Predictor {
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const Exp_OutputHookFunc& hookfunc);
void RegisterOutputHook(const OutputTensorHookFunc& hookfunc);
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc);
///
/// \brief Get the execution stream on devices with a concept of stream,
......
......@@ -37,9 +37,12 @@ namespace paddle_infer {
using Strings = std::vector<std::string>;
class Tensor;
using Exp_OutputHookFunc =
using OutputTensorHookFunc =
std::function<void(const std::string&, const std::string&, const Tensor&)>;
using OutputTensorHookFunc_V2 = std::function<void(
const std::string&, const std::string&, const paddle::Tensor&)>;
typedef void (*CallbackFunc)(void*);
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
......
......@@ -291,8 +291,9 @@ class OpConverter {
if (parameters.count(input)) continue;
// NOTE(liuyuanle): It is a trick. If you need a name [input], then you
// need to use [input.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
auto idx = input.find("_cast.tmp_");
// Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
auto idx = input.find("_cast_auto_mixed.tmp_");
input = input.substr(0, idx);
auto* var = block_desc->FindVar(input);
......
......@@ -315,9 +315,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto name : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [name], then you need
// to use [name.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
// Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
std::string name_real = name;
auto idx = name.find("_cast.tmp_");
auto idx = name.find("_cast_auto_mixed.tmp_");
name = name.substr(0, idx);
auto &t = inference::analysis::GetFromScope<phi::DenseTensor>(
......@@ -387,9 +388,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto x : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [x], then you need
// to use [x.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in
// Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
auto idx = x.find("_cast.tmp_");
auto idx = x.find("_cast_auto_mixed.tmp_");
x = x.substr(0, idx);
PADDLE_ENFORCE_EQ(
......@@ -560,9 +561,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
for (auto x : runtime_input_names_) {
// NOTE(liuyuanle): It is a trick. If you need a [x], then you need
// to use [x.substr(0, idx)].
// Maybe we insert suffix of "_cast.tmp_" in auto_mixed_precision_pass.
// Maybe we insert suffix of "_cast_auto_mixed.tmp_" in
// auto_mixed_precision_pass.
std::string x_real = x;
auto idx = x.find("_cast.tmp_");
auto idx = x.find("_cast_auto_mixed.tmp_");
x = x.substr(0, idx);
#if IS_TRT_VERSION_LT(8000)
......
......@@ -36,6 +36,7 @@
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/api/paddle_tensor.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
......@@ -1095,7 +1096,11 @@ void BindPaddleInferPredictor(py::module *m) {
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor)
.def("register_output_hook",
&paddle_infer::Predictor::RegisterOutputHook);
py::overload_cast<const paddle_infer::OutputTensorHookFunc &>(
&paddle_infer::Predictor::RegisterOutputHook))
.def("register_output_hook_v2",
py::overload_cast<const paddle_infer::OutputTensorHookFunc_V2 &>(
&paddle_infer::Predictor::RegisterOutputHook));
}
void BindZeroCopyTensor(py::module *m) {
......
......@@ -667,7 +667,7 @@ TEST(Predictor, Streams) {
}
#endif
TEST(AnalysisPredictor, OutputHookFunc) {
TEST(AnalysisPredictor, OutputTensorHookFunc) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const Tensor& tensor) { LOG(INFO) << "in hook function"; };
......@@ -714,4 +714,53 @@ TEST(AnalysisPredictor, OutputHookFunc) {
}
}
TEST(AnalysisPredictor, OutputTensorHookFunc_V2) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const paddle::Tensor& tensor) {
LOG(INFO) << "in hook function";
};
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
predictor->TryShrinkMemory();
}
{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableMemoryOptim();
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
predictor->RegisterOutputHook(hookfunc);
}
}
} // namespace paddle_infer
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册