Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
418d2796
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
418d2796
编写于
6月 08, 2023
作者:
Y
Yuanle Liu
提交者:
GitHub
6月 08, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
output tensor hook support while op (#54432)
上级
2f781a34
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
48 addition
and
136 deletion
+48
-136
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+7
-3
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+2
-2
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+4
-0
paddle/fluid/framework/new_executor/interpretercore.h
paddle/fluid/framework/new_executor/interpretercore.h
+7
-0
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+7
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+15
-40
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-12
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+1
-12
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+0
-10
paddle/fluid/inference/api/paddle_tensor.h
paddle/fluid/inference/api/paddle_tensor.h
+1
-5
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+2
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+1
-5
test/cpp/inference/api/analysis_predictor_tester.cc
test/cpp/inference/api/analysis_predictor_tester.cc
+0
-47
未找到文件。
paddle/fluid/framework/naive_executor.cc
浏览文件 @
418d2796
...
...
@@ -66,6 +66,10 @@ void NaiveExecutor::Run() {
platform
::
NvtxRangeColor
::
Green
);
#endif
if
(
op
->
Type
()
==
"while"
)
{
op
->
SetOutputHooks
(
hookfuncs_
);
}
op
->
Run
(
*
scope_
,
place_
);
// Update the shared_holder so that only records the max one.
...
...
@@ -97,8 +101,8 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
#endif
for
(
auto
&
func
:
hookfunc_
)
{
func
(
op
.
get
());
for
(
auto
&
func
:
hookfunc
s
_
)
{
func
(
op
.
get
()
,
scope_
);
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
...
...
@@ -178,7 +182,7 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
}
void
NaiveExecutor
::
RegisterOutputHook
(
const
HookFunc
&
hookfunc
)
{
hookfunc_
.
push_back
(
hookfunc
);
hookfunc
s
_
.
push_back
(
hookfunc
);
}
void
NaiveExecutor
::
MakeReusePlan
(
...
...
paddle/fluid/framework/naive_executor.h
浏览文件 @
418d2796
...
...
@@ -38,7 +38,7 @@ class Scope;
class
NaiveExecutor
{
public:
using
HookFunc
=
std
::
function
<
void
(
OperatorBase
*
)
>
;
using
HookFunc
=
std
::
function
<
void
(
OperatorBase
*
,
Scope
*
)
>
;
explicit
NaiveExecutor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
...
...
@@ -86,7 +86,7 @@ class NaiveExecutor {
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
{
nullptr
};
std
::
vector
<
HookFunc
>
hookfunc_
;
std
::
vector
<
HookFunc
>
hookfunc
s
_
;
// Record information that tensor_a should ShareBufferWith tensor_b.
std
::
unordered_map
<
OperatorBase
*
,
std
::
unordered_map
<
phi
::
DenseTensor
*
,
int
>>
...
...
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
418d2796
...
...
@@ -949,6 +949,10 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
#endif
}
for
(
auto
&
hook
:
hookfuncs_
)
{
hook
(
op
,
local_scope
);
}
// for debug nan/inf
if
(
op_with_kernel
!=
nullptr
&&
FLAGS_check_nan_inf
)
{
VLOG
(
4
)
<<
"Check nan/inf"
;
...
...
paddle/fluid/framework/new_executor/interpretercore.h
浏览文件 @
418d2796
...
...
@@ -77,6 +77,11 @@ class InterpreterCore {
const
platform
::
Place
&
GetPlace
()
const
{
return
place_
;
}
using
HookFunc
=
std
::
function
<
void
(
OperatorBase
*
,
Scope
*
)
>
;
void
SetOutputHooks
(
const
std
::
vector
<
HookFunc
>&
hookfuncs
)
{
hookfuncs_
=
hookfuncs
;
}
private:
DISABLE_COPY_AND_ASSIGN
(
InterpreterCore
);
// build graph
...
...
@@ -184,6 +189,8 @@ class InterpreterCore {
std
::
vector
<
size_t
>
trace_execute_order_
;
InstructionSchedulingPriorityLess
instruction_scheduling_priority_less
;
std
::
vector
<
HookFunc
>
hookfuncs_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/operator.h
浏览文件 @
418d2796
...
...
@@ -371,6 +371,11 @@ class OperatorBase {
void
SetId
(
uint64_t
id
)
{
id_
=
id
;
}
using
HookFunc
=
std
::
function
<
void
(
OperatorBase
*
,
Scope
*
)
>
;
void
SetOutputHooks
(
const
std
::
vector
<
HookFunc
>&
hookfuncs
)
{
hookfuncs_
=
hookfuncs
;
}
protected:
std
::
string
type_
;
// NOTE: in case of OpGrad, inputs_ contains:
...
...
@@ -399,6 +404,8 @@ class OperatorBase {
// Whether this operator executes in an Executor.
bool
run_by_executor_
{
true
};
std
::
vector
<
HookFunc
>
hookfuncs_
;
private:
void
GenerateTemporaryNames
();
void
CheckAllInputOutputSet
()
const
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
418d2796
...
...
@@ -2638,47 +2638,26 @@ void AnalysisPredictor::RegisterOutputHook(
const
OutputTensorHookFunc
&
hookfunc
)
{
static
std
::
once_flag
register_hook_flag
;
std
::
call_once
(
register_hook_flag
,
[
this
]
{
executor_
->
RegisterOutputHook
([
this
](
framework
::
OperatorBase
*
op
)
{
for
(
auto
&
output
:
op
->
Outputs
())
{
for
(
auto
&
var_name
:
output
.
second
)
{
auto
*
var
=
this
->
sub_scope_
->
FindVar
(
var_name
);
if
(
!
var
||
!
var
->
IsType
<
phi
::
DenseTensor
>
())
continue
;
auto
dense_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
if
(
!
dense_tensor
.
initialized
())
continue
;
auto
tensor
=
this
->
GetOutputTensor
(
var_name
);
for
(
auto
&
hookfunc
:
this
->
hookfuncs_
)
{
hookfunc
(
op
->
Type
(),
var_name
,
*
tensor
);
executor_
->
RegisterOutputHook
(
[
this
](
framework
::
OperatorBase
*
op
,
framework
::
Scope
*
scope
)
{
for
(
auto
&
output
:
op
->
Outputs
())
{
for
(
auto
&
var_name
:
output
.
second
)
{
auto
*
var
=
scope
->
FindVar
(
var_name
);
if
(
!
var
||
!
var
->
IsType
<
phi
::
DenseTensor
>
())
continue
;
auto
dense_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
if
(
!
dense_tensor
.
initialized
())
continue
;
auto
tensor
=
paddle
::
Tensor
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
dense_tensor
),
var_name
);
for
(
auto
&
hookfunc
:
this
->
hookfuncs_
)
{
hookfunc
(
op
->
Type
(),
var_name
,
tensor
);
}
}
}
}
}
});
});
});
hookfuncs_
.
push_back
(
hookfunc
);
}
void
AnalysisPredictor
::
RegisterOutputHook
(
const
OutputTensorHookFunc_V2
&
hookfunc
)
{
static
std
::
once_flag
register_hook_flag
;
std
::
call_once
(
register_hook_flag
,
[
this
]
{
executor_
->
RegisterOutputHook
([
this
](
framework
::
OperatorBase
*
op
)
{
for
(
auto
&
output
:
op
->
Outputs
())
{
for
(
auto
&
var_name
:
output
.
second
)
{
auto
*
var
=
this
->
sub_scope_
->
FindVar
(
var_name
);
if
(
!
var
||
!
var
->
IsType
<
phi
::
DenseTensor
>
())
continue
;
auto
dense_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
if
(
!
dense_tensor
.
initialized
())
continue
;
auto
tensor
=
paddle
::
Tensor
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
dense_tensor
),
var_name
);
for
(
auto
&
hookfunc
:
this
->
hookfuncs_v2_
)
{
hookfunc
(
op
->
Type
(),
var_name
,
tensor
);
}
}
}
});
});
hookfuncs_v2_
.
push_back
(
hookfunc
);
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
>
(
const
AnalysisConfig
&
config
)
{
...
...
@@ -2964,10 +2943,6 @@ void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
predictor_
->
RegisterOutputHook
(
hookfunc
);
}
void
Predictor
::
RegisterOutputHook
(
const
OutputTensorHookFunc_V2
&
hookfunc
)
{
predictor_
->
RegisterOutputHook
(
hookfunc
);
}
void
*
Predictor
::
GetExecStream
()
const
{
return
predictor_
->
GetExecStream
();
}
int
GetNumBytesOfDataType
(
DataType
dtype
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
418d2796
...
...
@@ -318,16 +318,6 @@ class AnalysisPredictor : public PaddlePredictor {
///
Argument
::
fusion_statis_t
fusion_statis
()
{
return
fusion_statis_
;
}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void
RegisterOutputHook
(
const
OutputTensorHookFunc
&
hookfunc
)
override
;
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
...
...
@@ -336,7 +326,7 @@ class AnalysisPredictor : public PaddlePredictor {
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void
RegisterOutputHook
(
const
OutputTensorHookFunc
_V2
&
hookfunc
)
override
;
void
RegisterOutputHook
(
const
OutputTensorHookFunc
&
hookfunc
)
override
;
///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
...
...
@@ -608,7 +598,6 @@ class AnalysisPredictor : public PaddlePredictor {
private:
std
::
vector
<
OutputTensorHookFunc
>
hookfuncs_
;
std
::
vector
<
OutputTensorHookFunc_V2
>
hookfuncs_v2_
;
// Some status here that help to determine the status inside the predictor.
bool
status_is_cloned_
{
false
};
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
418d2796
...
...
@@ -39,7 +39,6 @@ using PaddleDType = paddle_infer::DataType;
using
PaddlePlace
=
paddle_infer
::
PlaceType
;
using
PaddleDataLayout
=
paddle_infer
::
DataLayout
;
using
paddle_infer
::
OutputTensorHookFunc
;
using
paddle_infer
::
OutputTensorHookFunc_V2
;
/// \brief Memory manager for PaddleTensor.
///
...
...
@@ -314,16 +313,6 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual
uint64_t
TryShrinkMemory
()
{
return
0
;
}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual
void
RegisterOutputHook
(
const
OutputTensorHookFunc
&
hookfunc
)
{}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
...
...
@@ -332,7 +321,7 @@ class PD_INFER_DECL PaddlePredictor {
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual
void
RegisterOutputHook
(
const
OutputTensorHookFunc
_V2
&
hookfunc
)
{}
virtual
void
RegisterOutputHook
(
const
OutputTensorHookFunc
&
hookfunc
)
{}
/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
418d2796
...
...
@@ -199,16 +199,6 @@ class PD_INFER_DECL Predictor {
///
void
RegisterOutputHook
(
const
OutputTensorHookFunc
&
hookfunc
);
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void
RegisterOutputHook
(
const
OutputTensorHookFunc_V2
&
hookfunc
);
///
/// \brief Get the execution stream on devices with a concept of stream,
/// otherwise returns nullptr.
...
...
paddle/fluid/inference/api/paddle_tensor.h
浏览文件 @
418d2796
...
...
@@ -36,11 +36,7 @@ namespace paddle_infer {
/// Strings for text data.
using
Strings
=
std
::
vector
<
std
::
string
>
;
class
Tensor
;
using
OutputTensorHookFunc
=
std
::
function
<
void
(
const
std
::
string
&
,
const
std
::
string
&
,
const
Tensor
&
)
>
;
using
OutputTensorHookFunc_V2
=
std
::
function
<
void
(
using
OutputTensorHookFunc
=
std
::
function
<
void
(
const
std
::
string
&
,
const
std
::
string
&
,
const
paddle
::
Tensor
&
)
>
;
typedef
void
(
*
CallbackFunc
)(
void
*
);
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
418d2796
...
...
@@ -220,6 +220,8 @@ class WhileOp : public framework::OperatorBase {
dev_place
,
*
block
,
&
placeholder
,
execution_config
));
}
core_
->
SetOutputHooks
(
hookfuncs_
);
if
(
!
is_test
)
{
while
(
cond_data
)
{
auto
&
current_scope
=
scope
.
NewScope
();
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
418d2796
...
...
@@ -1096,11 +1096,7 @@ void BindPaddleInferPredictor(py::module *m) {
.
def
(
"clear_intermediate_tensor"
,
&
paddle_infer
::
Predictor
::
ClearIntermediateTensor
)
.
def
(
"register_output_hook"
,
py
::
overload_cast
<
const
paddle_infer
::
OutputTensorHookFunc
&>
(
&
paddle_infer
::
Predictor
::
RegisterOutputHook
))
.
def
(
"register_output_hook_v2"
,
py
::
overload_cast
<
const
paddle_infer
::
OutputTensorHookFunc_V2
&>
(
&
paddle_infer
::
Predictor
::
RegisterOutputHook
));
&
paddle_infer
::
Predictor
::
RegisterOutputHook
);
}
void
BindZeroCopyTensor
(
py
::
module
*
m
)
{
...
...
test/cpp/inference/api/analysis_predictor_tester.cc
浏览文件 @
418d2796
...
...
@@ -668,53 +668,6 @@ TEST(Predictor, Streams) {
#endif
TEST
(
AnalysisPredictor
,
OutputTensorHookFunc
)
{
auto
hookfunc
=
[](
const
std
::
string
&
type
,
const
std
::
string
&
var_name
,
const
Tensor
&
tensor
)
{
LOG
(
INFO
)
<<
"in hook function"
;
};
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
predictor
->
RegisterOutputHook
(
hookfunc
);
auto
w0
=
predictor
->
GetInputHandle
(
"firstw"
);
auto
w1
=
predictor
->
GetInputHandle
(
"secondw"
);
auto
w2
=
predictor
->
GetInputHandle
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputHandle
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
Run
();
predictor
->
TryShrinkMemory
();
}
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
EnableMemoryOptim
();
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
predictor
->
RegisterOutputHook
(
hookfunc
);
}
}
TEST
(
AnalysisPredictor
,
OutputTensorHookFunc_V2
)
{
auto
hookfunc
=
[](
const
std
::
string
&
type
,
const
std
::
string
&
var_name
,
const
paddle
::
Tensor
&
tensor
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录