Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
db323927
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
db323927
编写于
11月 01, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
11月 01, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle Inference] add RegisterOutputHook interface (#47050)
上级
a341bb8c
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
145 addition
and
26 deletion
+145
-26
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+5
-8
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+10
-9
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+39
-8
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+12
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+47
-0
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+11
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+10
-0
paddle/fluid/inference/api/paddle_tensor.h
paddle/fluid/inference/api/paddle_tensor.h
+7
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+4
-1
未找到文件。
paddle/fluid/framework/naive_executor.cc
浏览文件 @
db323927
...
...
@@ -65,6 +65,9 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
#endif
if
(
hookfunc_
)
{
hookfunc_
(
op
.
get
());
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
...
...
@@ -142,14 +145,8 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
return
tensor
;
}
void
NaiveExecutor
::
CleanFeedFetchOps
()
{
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops
;
for
(
auto
&
op
:
ops_
)
{
if
(
op
->
Type
()
!=
"feed"
&&
op
->
Type
()
!=
"fetch"
)
{
ops
.
emplace_back
(
std
::
move
(
op
));
}
}
ops_
.
swap
(
ops
);
void
NaiveExecutor
::
RegisterOutputHook
(
const
HookFunc
&
hookfunc
)
{
hookfunc_
=
hookfunc
;
}
NaiveExecutor
::~
NaiveExecutor
()
{
...
...
paddle/fluid/framework/naive_executor.h
浏览文件 @
db323927
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
...
...
@@ -24,10 +25,6 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
phi
{
class
DenseTensor
;
}
// namespace phi
namespace
paddle
{
namespace
framework
{
...
...
@@ -40,6 +37,8 @@ class Scope;
class
NaiveExecutor
{
public:
using
HookFunc
=
std
::
function
<
void
(
OperatorBase
*
)
>
;
explicit
NaiveExecutor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
~
NaiveExecutor
();
...
...
@@ -66,13 +65,13 @@ class NaiveExecutor {
// Get an tensor to operating directly, without the need for feed_ops.
phi
::
DenseTensor
*
FindTensor
(
const
std
::
string
&
name
);
Scope
*
scope
()
{
return
scope_
;
}
void
CleanFeedFetchOps
();
Scope
*
GetScope
()
{
return
scope_
;
}
void
ResetTrtOps
(
int
num
);
protected:
void
RegisterOutputHook
(
const
HookFunc
&
hookfunc
);
private:
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
...
...
@@ -81,7 +80,9 @@ class NaiveExecutor {
const
platform
::
Place
place_
;
// Catch the required resource to avoid recreate.
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
;
Scope
*
scope_
{
nullptr
};
HookFunc
hookfunc_
{
nullptr
};
};
}
// namespace framework
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
db323927
...
...
@@ -32,6 +32,7 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/var_type_traits.h"
...
...
@@ -1557,10 +1558,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
if
(
config_
.
dist_config
().
use_dist_model
())
{
scope
=
scope_
.
get
();
}
else
{
scope
=
executor_
->
s
cope
();
scope
=
executor_
->
GetS
cope
();
}
#else
scope
=
executor_
->
s
cope
();
scope
=
executor_
->
GetS
cope
();
#endif
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
name
),
...
...
@@ -1612,10 +1613,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
if
(
config_
.
dist_config
().
use_dist_model
())
{
scope
=
scope_
.
get
();
}
else
{
scope
=
executor_
->
s
cope
();
scope
=
executor_
->
GetS
cope
();
}
#else
scope
=
executor_
->
s
cope
();
scope
=
executor_
->
GetS
cope
();
#endif
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
name
),
...
...
@@ -1997,7 +1998,7 @@ void AnalysisPredictor::ClearIntermediateTensor() {
for
(
auto
*
var
:
global_block
->
AllVars
())
{
if
(
!
IsPersistable
(
var
))
{
const
std
::
string
name
=
var
->
Name
();
auto
*
variable
=
executor_
->
s
cope
()
->
FindVar
(
name
);
auto
*
variable
=
executor_
->
GetS
cope
()
->
FindVar
(
name
);
if
(
variable
!=
nullptr
&&
variable
->
IsType
<
phi
::
DenseTensor
>
()
&&
name
!=
"feed"
&&
name
!=
"fetch"
)
{
VLOG
(
3
)
<<
"Clear Intermediate Tensor: "
<<
name
;
...
...
@@ -2178,6 +2179,33 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
exe
.
Run
(
save_program
,
scope
(),
0
,
true
,
true
);
}
void
AnalysisPredictor
::
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
)
{
if
(
config_
.
enable_memory_optim
())
{
LOG
(
WARNING
)
<<
"If you want to run output hook function, you should "
"use config.EnableMemoryOptim(false) to turn off memory "
"reuse!"
;
return
;
}
static
std
::
once_flag
register_hook_flag
;
std
::
call_once
(
register_hook_flag
,
[
this
]
{
executor_
->
RegisterOutputHook
([
this
](
framework
::
OperatorBase
*
op
)
{
for
(
auto
&
output
:
op
->
Outputs
())
{
for
(
auto
&
var_name
:
output
.
second
)
{
auto
*
var
=
this
->
sub_scope_
->
FindVar
(
var_name
);
if
(
!
var
||
!
var
->
IsType
<
phi
::
DenseTensor
>
())
continue
;
auto
dense_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
if
(
!
dense_tensor
.
initialized
())
continue
;
auto
tensor
=
this
->
GetOutputTensor
(
var_name
);
for
(
auto
&
hookfunc
:
this
->
hookfuncs_
)
{
hookfunc
(
op
->
Type
(),
var_name
,
*
tensor
);
}
}
}
});
});
hookfuncs_
.
push_back
(
hookfunc
);
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
>
(
const
AnalysisConfig
&
config
)
{
...
...
@@ -2371,6 +2399,10 @@ void Predictor::ClearIntermediateTensor() {
uint64_t
Predictor
::
TryShrinkMemory
()
{
return
predictor_
->
TryShrinkMemory
();
}
void
Predictor
::
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
)
{
predictor_
->
RegisterOutputHook
(
hookfunc
);
}
void
*
Predictor
::
GetExecStream
()
const
{
return
predictor_
->
GetExecStream
();
}
int
GetNumBytesOfDataType
(
DataType
dtype
)
{
...
...
@@ -2452,10 +2484,9 @@ PredictorPool::PredictorPool(const Config &config, size_t size) {
for
(
size_t
i
=
0
;
i
<
size
-
1
;
i
++
)
{
if
(
config
.
tensorrt_engine_enabled
())
{
Config
config_tmp
(
copy_config
);
preds_
.
push_back
(
std
::
move
(
std
::
unique_ptr
<
Predictor
>
(
new
Predictor
(
config_tmp
))));
preds_
.
emplace_back
(
new
Predictor
(
config_tmp
));
}
else
{
preds_
.
push_back
(
std
::
move
(
main_pred_
->
Clone
()
));
preds_
.
emplace_back
(
main_pred_
->
Clone
(
));
}
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
db323927
...
...
@@ -272,6 +272,16 @@ class AnalysisPredictor : public PaddlePredictor {
///
std
::
string
GetSerializedProgram
()
const
override
;
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
)
override
;
///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
///
...
...
@@ -510,6 +520,8 @@ class AnalysisPredictor : public PaddlePredictor {
int
predictor_id_
;
private:
std
::
vector
<
Exp_OutputHookFunc
>
hookfuncs_
;
// Some status here that help to determine the status inside the predictor.
bool
status_is_cloned_
{
false
};
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
db323927
...
...
@@ -611,4 +611,51 @@ TEST(Predictor, Streams) {
}
#endif
TEST
(
AnalysisPredictor
,
OutputHookFunc
)
{
auto
hookfunc
=
[](
const
std
::
string
&
type
,
const
std
::
string
&
var_name
,
const
Tensor
&
tensor
)
{
LOG
(
INFO
)
<<
"in hook function"
;
};
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
predictor
->
RegisterOutputHook
(
hookfunc
);
auto
w0
=
predictor
->
GetInputHandle
(
"firstw"
);
auto
w1
=
predictor
->
GetInputHandle
(
"secondw"
);
auto
w2
=
predictor
->
GetInputHandle
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputHandle
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
Run
();
predictor
->
TryShrinkMemory
();
}
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
EnableMemoryOptim
();
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
predictor
->
RegisterOutputHook
(
hookfunc
);
}
}
}
// namespace paddle_infer
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
db323927
...
...
@@ -38,6 +38,7 @@ namespace paddle {
using
PaddleDType
=
paddle_infer
::
DataType
;
using
PaddlePlace
=
paddle_infer
::
PlaceType
;
using
PaddleDataLayout
=
paddle_infer
::
DataLayout
;
using
paddle_infer
::
Exp_OutputHookFunc
;
/// \brief Memory manager for PaddleTensor.
///
...
...
@@ -289,6 +290,16 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual
uint64_t
TryShrinkMemory
()
{
return
0
;
}
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual
void
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
)
{}
/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
/// and the parameters between them are shared.
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
db323927
...
...
@@ -157,6 +157,16 @@ class PD_INFER_DECL Predictor {
///
uint64_t
TryShrinkMemory
();
///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void
RegisterOutputHook
(
const
Exp_OutputHookFunc
&
hookfunc
);
///
/// \brief Get the execution stream on devices with a concept of stream,
/// otherwise returns nullptr.
...
...
paddle/fluid/inference/api/paddle_tensor.h
浏览文件 @
db323927
...
...
@@ -14,7 +14,10 @@
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "paddle_infer_declare.h" // NOLINT
...
...
@@ -29,6 +32,10 @@ namespace paddle_infer {
/// Strings for text data.
using
Strings
=
std
::
vector
<
std
::
string
>
;
class
Tensor
;
using
Exp_OutputHookFunc
=
std
::
function
<
void
(
const
std
::
string
&
,
const
std
::
string
&
,
const
Tensor
&
)
>
;
typedef
void
(
*
CallbackFunc
)(
void
*
);
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
db323927
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/pybind/inference_api.h"
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
...
...
@@ -946,7 +947,9 @@ void BindPaddleInferPredictor(py::module *m) {
#endif
.
def
(
"try_shrink_memory"
,
&
paddle_infer
::
Predictor
::
TryShrinkMemory
)
.
def
(
"clear_intermediate_tensor"
,
&
paddle_infer
::
Predictor
::
ClearIntermediateTensor
);
&
paddle_infer
::
Predictor
::
ClearIntermediateTensor
)
.
def
(
"register_output_hook"
,
&
paddle_infer
::
Predictor
::
RegisterOutputHook
);
}
void
BindZeroCopyTensor
(
py
::
module
*
m
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录