Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c11d9b30
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c11d9b30
编写于
10月 28, 2020
作者:
J
Jacek Czaja
提交者:
GitHub
10月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[oneDNN ] conv2d fwd&bwd optimization (#27871)
上级
d932b561
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
88 addition
and
38 deletion
+88
-38
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+50
-38
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+4
-0
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+18
-0
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
...luid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
+2
-0
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
...dle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
+2
-0
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
.../tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
+2
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+10
-0
未找到文件。
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
浏览文件 @
c11d9b30
...
...
@@ -211,22 +211,8 @@ class ConvMKLDNNHandlerT
* ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance
*/
// TODO(jczaja): This is workaround to make grad op UT's numerical
// gradient computation proper as this op is called directly without
// fetch op following it , so numercial grad is computed (in python)
// using block formats which will give wrong results
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
auto
chosen_memory_format
=
is_test
?
MKLDNNMemoryFormat
::
any
:
platform
::
data_format_to_memory_format
(
data_format
);
// Check the format for user's special output
if
(
chosen_memory_format
!=
MKLDNNMemoryFormat
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
auto
chosen_memory_format
=
MKLDNNMemoryFormat
::
any
;
auto
data_type
=
mkldnn
::
memory
::
data_type
::
f32
;
if
(
ctx
.
Attr
<
std
::
string
>
(
"mkldnn_data_type"
)
==
"bfloat16"
||
std
::
is_same
<
T_out
,
platform
::
bfloat16
>::
value
)
...
...
@@ -351,14 +337,16 @@ class ConvMKLDNNHandlerT
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireResidualMemory
(
const
framework
::
Tensor
*
residual_param
)
{
const
T
*
residual_data
=
residual_param
->
data
<
T
>
();
void
*
residual_data
=
residual_param
->
type
()
==
framework
::
DataTypeTrait
<
T_out
>::
DataType
()
?
to_void_cast
<
T_out
>
(
residual_param
->
data
<
T_out
>
())
:
to_void_cast
<
T
>
(
residual_param
->
data
<
T
>
());
auto
user_residual_md
=
platform
::
MKLDNNMemDesc
(
framework
::
vectorize
(
residual_param
->
dims
()),
framework
::
ToMKLDNNDataType
(
residual_param
->
type
()),
residual_param
->
format
());
return
this
->
AcquireMemoryFromPrimitive
(
user_residual_md
,
to_void_cast
<
T
>
(
residual_data
),
return
this
->
AcquireMemoryFromPrimitive
(
user_residual_md
,
residual_data
,
"@user_residual_data_mem_p"
);
}
...
...
@@ -973,22 +961,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
* the memory format preferred for best performance
*/
// TODO(jczaja): Once GRAD NHWC is working then format 'any'
// should be used exclusively. But till forward pass enforce
// NCHW for training we need to have NCHW here as well
// to avoid performance degradation in relu_grad and pool2d_grad
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
auto
chosen_memory_format
=
MKLDNNMemoryFormat
::
any
;
weights_format
=
MKLDNNMemoryFormat
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
MKLDNNMemoryFormat
::
any
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
...
@@ -1055,9 +1029,12 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const
size_t
size
=
handler
.
GetDiffWeightsMemorySize
();
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
size
);
// For convoluition with groups write filter grad into
// oneDNN buffer and then we reorder it into filter_grad tensor
auto
diff_weights_memory_p
=
handler
.
AcquireDiffWeightsMemoryFromWeightsPrimitive
(
reinterpret_cast
<
void
*>
(
filter_grad_data
));
g
>
1
?
handler
.
AcquireDiffWeightsMemoryFromWeightsPrimitive
()
:
handler
.
AcquireDiffWeightsMemoryFromWeightsPrimitive
(
reinterpret_cast
<
void
*>
(
filter_grad_data
));
auto
conv_bwd_weights_p
=
handler
.
AcquireConvolutionBackwardWeights
();
...
...
@@ -1072,8 +1049,43 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// in OneDNN groups in convolution are treated as separate dimension
// which is not the case in paddlepaddle
auto
filter_fmt
=
GetMKLDNNFormat
(
*
diff_weights_memory_p
);
filter_grad
->
set_format
(
platform
::
MKLDNNFormatForSize
(
g
>
1
?
weights_tz
.
size
()
-
1
:
weights_tz
.
size
(),
filter_fmt
));
// For convolution with groups convert from blocked to NCHW
// otherwise there will be problems in next operators working on this data
if
(
g
>
1
)
{
memory
::
data_type
in_type
=
framework
::
ToMKLDNNDataType
(
filter_grad
->
type
());
// for 3d conv with groups (six dimensional data reorder to goidhw)
// for 2d conv with groups (five dimensional data reorder to goihw)
mkldnn
::
memory
::
format_tag
out_format
=
weights_tz
.
size
()
==
6
?
mkldnn
::
memory
::
format_tag
::
goidhw
:
mkldnn
::
memory
::
format_tag
::
goihw
;
const
std
::
string
key
=
platform
::
CreateKey
(
weights_tz
,
filter_fmt
,
out_format
,
in_type
);
platform
::
ReorderMKLDNNHandler
handler
(
weights_tz
,
filter_grad
->
type
(),
in_type
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
reorder_dst_memory_p
=
handler
.
AcquireDstMemory
(
filter_grad
,
out_format
,
ctx
.
GetPlace
());
auto
reorder_p
=
handler
.
AcquireReorder
(
reorder_dst_memory_p
,
diff_weights_memory_p
);
reorder_p
->
execute
(
astream
,
*
diff_weights_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
// So here we have a data in goihw , which can be interpreted as OIHW
// (OIDHW for conv3d)
// because filter_grad shape is set for OIHW (OIDHW for conv3d)
mkldnn
::
memory
::
format_tag
target_format
=
weights_tz
.
size
()
==
6
?
mkldnn
::
memory
::
format_tag
::
oidhw
:
mkldnn
::
memory
::
format_tag
::
oihw
;
filter_grad
->
set_format
(
target_format
);
}
else
{
filter_grad
->
set_format
(
filter_fmt
);
}
}
if
(
input_grad
)
{
auto
weights_memory_p
=
handler
.
AcquireWeightsMemoryFromDataPrimitive
(
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
c11d9b30
...
...
@@ -289,6 +289,10 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat(
strides
[
3
]
>=
strides
[
4
]
&&
strides
[
4
]
>=
strides
[
1
])
{
return
mkldnn
::
memory
::
format_tag
::
Acdeb16a
;
}
if
(
strides
[
0
]
>=
strides
[
1
]
&&
strides
[
1
]
>=
strides
[
2
]
&&
strides
[
2
]
>=
strides
[
3
]
&&
strides
[
3
]
>=
strides
[
4
])
{
return
mkldnn
::
memory
::
format_tag
::
Abcde16a
;
}
}
else
if
(
inner_blks
[
0
]
==
16
&&
inner_idxs
[
0
]
==
1
)
{
if
(
strides
[
0
]
>=
strides
[
1
]
&&
strides
[
1
]
>=
strides
[
2
]
&&
strides
[
2
]
>=
strides
[
3
]
&&
strides
[
3
]
>=
strides
[
4
])
{
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
c11d9b30
...
...
@@ -346,6 +346,18 @@ class MKLDNNHandler {
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemoryFromPrimitive
(
mkldnn
::
memory
::
desc
md
,
const
std
::
string
&
suffix
)
{
const
auto
local_key
=
key_
+
suffix
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
md
,
engine_
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
return
mem_p
;
}
// This incarnation of AcquireMemory can call user function eg. custom reorder
// or preprocessing routine if needed
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireMemory
(
...
...
@@ -1199,6 +1211,12 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler {
conv_bwd_weights_pd_
->
diff_weights_desc
(),
ptr
,
"@diff_weights_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffWeightsMemoryFromWeightsPrimitive
(
void
)
{
return
this
->
AcquireMemoryFromPrimitive
(
conv_bwd_weights_pd_
->
diff_weights_desc
(),
"@diff_weights_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffDstMemoryFromDataPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
// NOLINT
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
浏览文件 @
c11d9b30
...
...
@@ -216,4 +216,6 @@ class TestWithInput1x1Filter1x1(TestConv2DBf16Op):
if
__name__
==
'__main__'
:
from
paddle
import
enable_static
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
浏览文件 @
c11d9b30
...
...
@@ -233,4 +233,6 @@ class TestMKLDNNDilations(TestConv2DMKLDNNOp):
if
__name__
==
'__main__'
:
from
paddle
import
enable_static
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
浏览文件 @
c11d9b30
...
...
@@ -110,4 +110,6 @@ class TestFusionGRUINT8MKLDNNOp3(TestFusionGRUBF16MKLDNNOp):
if
__name__
==
"__main__"
:
from
paddle
import
enable_static
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
c11d9b30
...
...
@@ -1320,6 +1320,13 @@ class OpTest(unittest.TestCase):
cache_list
=
None
if
hasattr
(
self
,
"cache_name_list"
):
cache_list
=
self
.
cache_name_list
# oneDNN numeric gradient should use CPU kernel
use_onednn
=
False
if
"use_mkldnn"
in
op_attrs
and
op_attrs
[
"use_mkldnn"
]
==
True
:
op_attrs
[
"use_mkldnn"
]
=
False
use_onednn
=
True
self
.
op
=
create_op
(
self
.
scope
,
self
.
op_type
,
...
...
@@ -1328,6 +1335,9 @@ class OpTest(unittest.TestCase):
op_attrs
,
cache_list
=
cache_list
)
if
use_onednn
:
op_attrs
[
"use_mkldnn"
]
=
True
if
no_grad_set
is
None
:
no_grad_set
=
set
()
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录