Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0abf7560
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0abf7560
编写于
10月 25, 2022
作者:
J
jakpiase
提交者:
GitHub
10月 25, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added workaround for elementwise oneDNN kernel (#47080)
* return proper state * fix for dims * fix
上级
06ef3f04
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
59 addition
and
8 deletion
+59
-8
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
...luid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+16
-4
paddle/phi/backends/onednn/onednn_reuse.h
paddle/phi/backends/onednn/onednn_reuse.h
+43
-4
未找到文件。
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
浏览文件 @
0abf7560
...
...
@@ -78,6 +78,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
scale_x
,
scale_y
,
scale_o
,
true
,
get_post_ops
(
ctx
));
// oneDNN's binary is optimized for broadcasting y into x, so in other case
...
...
@@ -126,7 +127,14 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
binary_prim
->
execute
(
astream
,
args
);
astream
.
wait
();
z
->
set_mem_desc
(
dst_memory
->
get_desc
());
if
(
handler
.
use_broadcasting_hack
==
false
)
{
z
->
set_mem_desc
(
dst_memory
->
get_desc
());
}
else
{
auto
dims
=
dst_memory
->
get_desc
().
dims
();
dims
.
insert
(
dims
.
begin
(),
x
->
dims
()[
0
]);
dims
[
1
]
/=
dims
[
0
];
z
->
set_mem_desc
(
dst_memory
->
get_desc
().
reshape
(
dims
));
}
}
};
...
...
@@ -210,7 +218,8 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
dx
,
1.0
f
,
1.0
f
,
1.0
f
);
1.0
f
,
false
);
const
auto
src_dout_memory
=
binary_handler
.
AcquireSrcMemory
(
dout
);
const
auto
src_y_memory
=
binary_handler
.
AcquireSecondSrcMemory
(
y
);
...
...
@@ -276,7 +285,8 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
nullptr
,
1.0
f
,
1.0
f
,
1.0
f
);
1.0
f
,
false
);
src_1_memory
=
binary_handler
.
AcquireSecondSrcMemory
(
x
);
...
...
@@ -291,7 +301,8 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
nullptr
,
1.0
f
,
1.0
f
,
1.0
f
);
1.0
f
,
false
);
post_op_memory
=
post_op_binary_handler
.
AcquireSrcMemory
(
y
);
...
...
@@ -310,6 +321,7 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
-
1.0
f
,
1.0
f
,
1.0
f
,
false
,
po
);
src_1_memory
=
binary_handler
.
AcquireSecondSrcMemory
(
out
);
...
...
paddle/phi/backends/onednn/onednn_reuse.h
浏览文件 @
0abf7560
...
...
@@ -825,6 +825,7 @@ class ReorderOneDNNHandler {
template
<
typename
T
>
class
BinaryOneDNNHandler
:
public
OneDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
{
public:
bool
use_broadcasting_hack
;
BinaryOneDNNHandler
(
const
dnnl
::
algorithm
algo
,
const
int
axis
,
const
dnnl
::
engine
engine
,
...
...
@@ -835,15 +836,17 @@ class BinaryOneDNNHandler : public OneDNNHandlerNoCachingT<T, dnnl::binary> {
float
scale_x
,
float
scale_y
,
float
scale_out
,
bool
allow_hack
,
const
dnnl
::
post_ops
&
post_ops
=
dnnl
::
post_ops
{})
:
OneDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
(
engine
,
cpu_place
)
{
use_broadcasting_hack
=
false
;
const
auto
src_x_tz
=
vectorize
(
x
->
dims
());
const
auto
src_y_tz
=
vectorize
(
y
->
dims
());
// if output tensor(z) is nullptr then we are computing into oneDNN
// managed buffer
auto
rankdiff
=
x
->
dims
().
size
()
-
y
->
dims
().
size
();
const
auto
dst_tz
=
(
out
==
nullptr
)
?
(
rankdiff
>
0
?
src_x_tz
:
src_y_tz
)
:
vectorize
(
out
->
dims
());
auto
dst_tz
=
(
out
==
nullptr
)
?
(
rankdiff
>
0
?
src_x_tz
:
src_y_tz
)
:
vectorize
(
out
->
dims
());
auto
src0_md
=
x
->
mem_desc
();
auto
src1_md
=
y
->
mem_desc
();
...
...
@@ -870,12 +873,48 @@ class BinaryOneDNNHandler : public OneDNNHandlerNoCachingT<T, dnnl::binary> {
}
src0_md
=
src0_md
.
reshape
(
dims0_ex
);
}
const
auto
dst_md
=
memory
::
desc
(
dst_tz
,
OneDNNGetDataType
<
T
>
(),
OneDNNMemoryFormat
::
any
);
auto
attributes
=
CreateAttributes
(
algo
,
scale_x
,
scale_y
,
scale_out
,
post_ops
);
// Workaround for U2++ model which deletes first tensor dimensions to enable
// optimized oneDNNs broadcasting. Output tensor is reshaped back afterwards
// at the end of the kernel, after the computation
if
(
allow_hack
&&
dst_tz
.
size
()
==
4
&&
src0_md
.
dims
()[
2
]
!=
src1_md
.
dims
()[
2
])
{
auto
are_strides_plain
=
[](
int64_t
*
strides
,
int
ndims
)
{
for
(
int
i
=
0
;
i
<
ndims
-
1
;
++
i
)
{
if
(
strides
[
i
]
<
strides
[
i
+
1
])
{
return
false
;
}
}
return
true
;
};
auto
src0_strides
=
src0_md
.
data
.
format_desc
.
blocking
.
strides
;
auto
src1_strides
=
src1_md
.
data
.
format_desc
.
blocking
.
strides
;
auto
src0_dims
=
src0_md
.
dims
();
auto
src1_dims
=
src1_md
.
dims
();
bool
can_squeeze
=
src0_dims
[
0
]
==
src1_dims
[
0
]
&&
src0_dims
[
1
]
==
src1_dims
[
1
]
&&
src0_dims
[
3
]
==
src1_dims
[
3
];
if
(
can_squeeze
&&
are_strides_plain
(
src0_strides
,
4
)
&&
are_strides_plain
(
src1_strides
,
4
))
{
src0_dims
[
1
]
*=
dst_tz
[
0
];
src1_dims
[
1
]
*=
dst_tz
[
0
];
dst_tz
[
1
]
*=
dst_tz
[
0
];
dst_tz
.
erase
(
dst_tz
.
begin
());
src0_md
=
src0_md
.
reshape
({
src0_dims
.
begin
()
+
1
,
src0_dims
.
end
()});
src1_md
=
src1_md
.
reshape
({
src1_dims
.
begin
()
+
1
,
src1_dims
.
end
()});
use_broadcasting_hack
=
true
;
}
}
auto
dst_md
=
memory
::
desc
(
dst_tz
,
OneDNNGetDataType
<
T
>
(),
OneDNNMemoryFormat
::
any
);
if
(
x
->
numel
()
<
y
->
numel
())
{
if
(
algo
==
dnnl
::
algorithm
::
binary_sub
)
{
attributes
=
CreateAttributes
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录