Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f88713e1
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f88713e1
编写于
12月 08, 2022
作者:
W
Wilber
提交者:
GitHub
12月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference] Enable infer shape cache. (#48312)
上级
fe86771a
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
104 addition
and
23 deletion
+104
-23
paddle/fluid/framework/ir/runtime_context_cache_pass.cc
paddle/fluid/framework/ir/runtime_context_cache_pass.cc
+20
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+74
-6
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+5
-3
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
...uid/inference/analysis/passes/ir_graph_to_program_pass.cc
+3
-3
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+2
-10
未找到文件。
paddle/fluid/framework/ir/runtime_context_cache_pass.cc
浏览文件 @
f88713e1
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/runtime_context_cache_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/operator.h"
namespace
paddle
{
...
...
@@ -21,10 +22,28 @@ namespace framework {
namespace
ir
{
void
RuntimeContextCachePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
static
constexpr
char
kNotAllowInferShapeCahce
[]
=
"@NOT_ALLOW_INFERSHAPE_CACHE@"
;
VLOG
(
3
)
<<
"Applies Runtime Context Cache strategy."
;
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
if
(
n
->
IsOp
()
&&
n
->
Op
())
{
n
->
Op
()
->
SetAttr
(
kEnableCacheRuntimeContext
,
true
);
n
->
Op
()
->
SetAttr
(
framework
::
kEnableCacheRuntimeContext
,
true
);
}
}
// if op1 -> var0 and op2 -> var0, then op1 and op2 not support
// InferShapeCache.
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
var2ops
;
for
(
auto
*
op_node
:
TopologySortOperations
(
*
graph
))
{
for
(
auto
*
var_node
:
op_node
->
outputs
)
{
var2ops
[
var_node
->
Name
()].
push_back
(
op_node
);
}
}
for
(
auto
&
it
:
var2ops
)
{
if
(
it
.
second
.
size
()
>
1
)
{
for
(
auto
op_node
:
it
.
second
)
{
op_node
->
Op
()
->
SetAttr
(
kNotAllowInferShapeCahce
,
true
);
}
}
}
}
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
f88713e1
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <sstream>
#include <string>
#include <unordered_set>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
...
...
@@ -36,6 +37,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/kernel_context.h"
#include "paddle/phi/core/kernel_factory.h"
#include "paddle/phi/ops/compat/signatures.h"
...
...
@@ -562,6 +564,14 @@ phi::DenseTensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
}
}
OperatorWithKernel
::
OperatorWithKernel
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
OperatorWithKernel
::~
OperatorWithKernel
()
=
default
;
bool
ExecutionContext
::
HasInput
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
InputVar
(
name
);
return
var
!=
nullptr
;
...
...
@@ -1204,19 +1214,54 @@ class RuntimeInferShapeContext : public InferShapeContext {
};
struct
OperatorWithKernel
::
CacheImpl
{
static
const
char
kNotAllowInferShapeCahce
[];
explicit
CacheImpl
(
phi
::
KernelContext
*
kernel_ctx
,
RuntimeInferShapeContext
*
infer_shape_ctx
)
:
kernel_ctx_
(
kernel_ctx
),
infer_shape_ctx_
(
infer_shape_ctx
)
{}
RuntimeInferShapeContext
*
infer_shape_ctx
,
const
std
::
vector
<
phi
::
DenseTensor
*>&
tensors
,
bool
not_allow_infer_shape_cache
)
:
kernel_ctx_
(
kernel_ctx
),
infer_shape_ctx_
(
infer_shape_ctx
),
tensors_
(
tensors
),
not_allow_infer_shape_cache_
(
not_allow_infer_shape_cache
)
{}
phi
::
KernelContext
*
getKernelContext
()
{
return
kernel_ctx_
.
get
();
}
RuntimeInferShapeContext
*
getRuntimeInferShapeContext
()
{
return
infer_shape_ctx_
.
get
();
}
bool
NeedInferShape
()
{
if
(
not_allow_infer_shape_cache_
)
return
true
;
bool
ret
{
false
};
if
(
last_ddims_
.
empty
()
||
tensors_
.
empty
())
ret
=
true
;
if
(
!
ret
)
{
CHECK_EQ
(
last_ddims_
.
size
(),
tensors_
.
size
());
for
(
size_t
i
=
0
;
i
<
last_ddims_
.
size
();
++
i
)
{
if
(
tensors_
[
i
]
->
dims
()
!=
last_ddims_
[
i
])
{
ret
=
true
;
break
;
}
}
}
if
(
ret
)
{
last_ddims_
.
resize
(
tensors_
.
size
());
for
(
size_t
i
=
0
;
i
<
last_ddims_
.
size
();
++
i
)
{
last_ddims_
[
i
]
=
tensors_
[
i
]
->
dims
();
}
}
VLOG
(
3
)
<<
"need infer shape is "
<<
ret
;
return
ret
;
}
private:
std
::
unique_ptr
<
phi
::
KernelContext
>
kernel_ctx_
;
std
::
unique_ptr
<
RuntimeInferShapeContext
>
infer_shape_ctx_
;
std
::
vector
<
phi
::
DenseTensor
*>
tensors_
;
bool
not_allow_infer_shape_cache_
;
std
::
vector
<
phi
::
DDim
>
last_ddims_
;
};
const
char
OperatorWithKernel
::
CacheImpl
::
kNotAllowInferShapeCahce
[]
=
"@NOT_ALLOW_INFERSHAPE_CACHE@"
;
static
void
CheckTensorNANOrInf
(
const
std
::
string
&
op_type
,
const
std
::
string
&
name
,
...
...
@@ -1524,8 +1569,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
pre_scope_
=
cur_scope
;
}
else
if
(
run_phi_kernel_
&&
impl_
!=
nullptr
&&
!
need_prepare_data_
&&
!
need_prepare_phi_data_
)
{
if
(
!
all_kernels_must_compute_runtime_shape_
)
if
(
!
all_kernels_must_compute_runtime_shape_
&&
impl_
->
NeedInferShape
())
{
this
->
Info
().
infer_shape_
(
impl_
->
getRuntimeInferShapeContext
());
}
(
*
phi_kernel_
)(
impl_
->
getKernelContext
());
}
else
{
if
(
runtime_ctx_
.
get
()
==
nullptr
||
pre_scope_
!=
cur_scope
)
{
...
...
@@ -1828,9 +1874,31 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
phi
::
KernelContext
phi_kernel_context
;
if
(
enable_cache_runtime_context_
&&
!
need_prepare_phi_data_
&&
!
need_prepare_data_
)
{
impl_
=
// TODO(inference): Now we only suppor dense_tensor cache, we may be
// support ScalarTensor, SparseTensor in future.
bool
all_dense_tensor_input_
{
true
};
for
(
auto
&
iter
:
Inputs
())
{
for
(
auto
&
name
:
iter
.
second
)
{
all_dense_tensor_input_
&=
scope
.
FindVar
(
name
)
->
IsType
<
phi
::
DenseTensor
>
();
}
}
std
::
vector
<
phi
::
DenseTensor
*>
tensors
;
if
(
all_dense_tensor_input_
)
{
for
(
auto
&
iter
:
Inputs
())
{
for
(
auto
&
name
:
iter
.
second
)
{
auto
*
t
=
scope
.
FindVar
(
name
)
->
GetMutable
<
phi
::
DenseTensor
>
();
tensors
.
push_back
(
t
);
}
}
}
impl_
.
reset
(
new
CacheImpl
(
new
phi
::
KernelContext
(),
new
RuntimeInferShapeContext
(
*
this
,
*
runtime_ctx
));
new
RuntimeInferShapeContext
(
*
this
,
*
runtime_ctx
),
tensors
,
HasAttr
(
CacheImpl
::
kNotAllowInferShapeCahce
)));
BuildPhiKernelContext
(
*
runtime_ctx
,
dev_ctx
,
impl_
->
getKernelContext
());
(
*
phi_kernel_
)(
impl_
->
getKernelContext
());
}
else
{
...
...
@@ -3246,6 +3314,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
if
(
phi
::
OneDNNContext
::
classof
(
dev_ctx
))
{
phi
::
OneDNNContext
*
one_dnn_ctx
=
static_cast
<
phi
::
OneDNNContext
*>
(
dev_ctx
);
one_dnn_ctx
->
ClearDnnAttr
();
if
(
!
RuntimeAttrs
().
empty
())
need_prepare_phi_data_
=
true
;
}
#endif
...
...
@@ -3267,7 +3336,6 @@ void OperatorWithKernel::BuildPhiKernelContext(
#if defined(PADDLE_WITH_MKLDNN) || defined(PADDLE_WITH_CUDA)
auto
&
runtime_attrs
=
RuntimeAttrs
();
for
(
const
auto
&
attr_iter
:
runtime_attrs
)
{
need_prepare_phi_data_
=
true
;
auto
&
attr_name
=
attr_iter
.
first
;
auto
&
attr
=
attr_iter
.
second
;
auto
attr_propertys
=
paddle
::
operators
::
GetExtraAttrProperties
(
attr_name
);
...
...
paddle/fluid/framework/operator.h
浏览文件 @
f88713e1
...
...
@@ -612,8 +612,9 @@ class OperatorWithKernel : public OperatorBase {
OperatorWithKernel
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
const
AttributeMap
&
attrs
);
virtual
~
OperatorWithKernel
();
static
paddle
::
flat_hash_map
<
std
::
string
/* op_type */
,
OpKernelMap
>&
AllOpKernels
()
{
...
...
@@ -785,8 +786,9 @@ class OperatorWithKernel : public OperatorBase {
mutable
std
::
unique_ptr
<
phi
::
Kernel
>
phi_kernel_
;
mutable
std
::
unique_ptr
<
phi
::
ArgumentMappingFn
>
arg_map_fn_
;
private:
struct
CacheImpl
;
mutable
CacheImpl
*
impl_
{
nullptr
}
;
mutable
std
::
unique_ptr
<
CacheImpl
>
impl_
;
};
extern
bool
OpSupportGPU
(
const
std
::
string
&
op_type
);
...
...
paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
浏览文件 @
f88713e1
...
...
@@ -23,6 +23,8 @@ namespace inference {
namespace
analysis
{
void
IrGraphToProgramPass
::
RunImpl
(
Argument
*
argument
)
{
auto
cache_pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
"runtime_context_cache_pass"
);
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_to_program_pass"
);
...
...
@@ -31,14 +33,12 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) {
new
int
(
argument
->
memory_optim_sort_kind
()));
}
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
(
argument
->
main_graph_ptr
());
// Direct using ProgramDesc desc(argument->main_program()) may cause
// incomplete copies of information.
framework
::
ProgramDesc
desc
;
desc
.
CopyFrom
(
*
argument
->
main_program
().
Proto
());
pass
->
SetNotOwned
(
"program"
,
&
desc
);
pass
->
Apply
(
graph
.
release
());
// the argument still own the graph.
pass
->
Apply
(
cache_pass
->
Apply
(
argument
->
main_graph_ptr
()));
argument
->
SetIrAnalyzedProgram
(
new
framework
::
proto
::
ProgramDesc
(
*
desc
.
Proto
()));
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
f88713e1
...
...
@@ -188,7 +188,6 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
"fc_fuse_pass"
,
"fc_elementwise_layernorm_fuse_pass"
,
"embedding_eltwise_layernorm_fuse_pass"
,
"runtime_context_cache_pass"
,
};
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
...
...
@@ -254,10 +253,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"constant_folding_pass"
,
//
// following pass should be located in the last, since it will
// work on all fused ops.
"float_to_half_pass"
,
//
"runtime_context_cache_pass"
});
use_gpu_
=
true
;
...
...
@@ -322,10 +318,7 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_transpose_bn_fuse_pass"
,
//
"conv_transpose_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
"constant_folding_pass"
,
// following pass should be located in the last, since
// it will work on all fused ops.
"runtime_context_cache_pass"
});
"constant_folding_pass"
});
use_gpu_
=
false
;
}
...
...
@@ -475,7 +468,6 @@ void CpuPassStrategy::EnableMkldnnInt8() {
passes_
.
push_back
(
"int8_scale_calculation_mkldnn_pass"
);
passes_
.
push_back
(
"params_quantization_mkldnn_pass"
);
passes_
.
push_back
(
"mkldnn_inplace_pass"
);
passes_
.
push_back
(
"runtime_context_cache_pass"
);
}
use_mkldnn_int8_
=
true
;
#else
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录