Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
92462e94
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
92462e94
编写于
1月 23, 2020
作者:
W
Wojciech Uss
提交者:
Tao Luo
1月 23, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
improve elementwise_add_mkldnn_op test code coverage (#22359)
上级
e6ca512a
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
87 addition
and
252 deletion
+87
-252
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+17
-2
paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
...operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+53
-148
python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py
.../tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py
+15
-98
python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_add_op.py
+2
-4
未找到文件。
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
92462e94
...
...
@@ -95,7 +95,13 @@ class ElementwiseOp : public framework::OperatorWithKernel {
auto
input_data_type
=
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
);
#ifdef PADDLE_WITH_MKLDNN
if
(
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
// If broadcasting is needed, use native implementation
auto
CanMKLDNNElementwiseAddBeUsed
=
[
&
]()
{
return
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
()
==
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
();
};
if
(
platform
::
CanMKLDNNBeUsed
(
ctx
)
&&
(
ctx
.
Type
()
!=
"elementwise_add"
||
CanMKLDNNElementwiseAddBeUsed
()))
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
...
...
@@ -227,7 +233,16 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx
,
framework
::
GradVarName
(
"Out"
));
#ifdef PADDLE_WITH_MKLDNN
if
(
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
// If broadcasting is needed, use native implementation
auto
CanMKLDNNElementwiseAddGradBeUsed
=
[
&
]()
{
auto
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
return
(
dx
!=
nullptr
&&
dy
!=
nullptr
&&
dx
->
dims
()
==
dy
->
dims
());
};
if
(
platform
::
CanMKLDNNBeUsed
(
ctx
)
&&
(
ctx
.
Type
()
!=
"elementwise_add_grad"
||
CanMKLDNNElementwiseAddGradBeUsed
()))
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
...
...
paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
浏览文件 @
92462e94
...
...
@@ -41,99 +41,27 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
y_data
=
y
->
data
<
T
>
();
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
x_dims
=
x
->
dims
();
auto
y_dims_untrimed
=
y
->
dims
();
auto
z_dims
=
z
->
dims
();
mkldnn
::
stream
astream
(
mkldnn_engine
);
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if
(
x_dims
!=
y_dims_untrimed
)
{
Tensor
_x
;
MKLDNNMemoryFormat
format
;
auto
src_x_tz
=
framework
::
vectorize
<
int64_t
>
(
x_dims
);
if
((
src_x_tz
.
size
()
==
3
&&
x
->
format
()
!=
(
format
=
MKLDNNMemoryFormat
::
ncw
))
||
(
src_x_tz
.
size
()
==
4
&&
x
->
format
()
!=
(
format
=
MKLDNNMemoryFormat
::
nchw
))
||
(
src_x_tz
.
size
()
==
5
&&
x
->
format
()
!=
(
format
=
MKLDNNMemoryFormat
::
ncdhw
)))
{
_x
.
Resize
(
x_dims
);
mkldnn
::
memory
::
data_type
in_type
=
platform
::
MKLDNNGetDataType
<
T
>
();
auto
out_format
=
platform
::
MKLDNNFormatForSize
(
x_dims
.
size
(),
MKLDNNMemoryFormat
::
nchw
);
const
std
::
string
key
=
platform
::
CreateKey
(
src_x_tz
,
x
->
format
(),
out_format
,
in_type
);
platform
::
ReorderMKLDNNHandler
handler
(
src_x_tz
,
x
->
type
(),
in_type
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
user_x_memory_p
=
handler
.
AcquireSrcMemory
(
x
->
format
(),
paddle
::
platform
::
to_void_cast
(
x_data
));
auto
x_memory_p
=
handler
.
AcquireDstMemory
(
&
_x
,
out_format
,
ctx
.
GetPlace
());
auto
x_reorder
=
handler
.
AcquireReorder
(
x_memory_p
,
user_x_memory_p
);
x_reorder
->
execute
(
astream
,
*
user_x_memory_p
,
*
x_memory_p
);
astream
.
wait
();
}
else
{
format
=
x
->
format
();
_x
.
ShareDataWith
(
*
x
);
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
sum_func
=
[](
T
a
,
T
b
)
->
T
{
return
a
+
b
;
};
PADDLE_ENFORCE_EQ
(
x
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for X tensor"
));
PADDLE_ENFORCE_NE
(
x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for X tensor"
));
TransformFunctor
<
decltype
(
sum_func
),
T
,
paddle
::
platform
::
CPUDeviceContext
,
T
>
functor
(
&
_x
,
y
,
z
,
ctx
.
template
device_context
<
paddle
::
platform
::
CPUDeviceContext
>()
,
sum_func
);
PADDLE_ENFORCE_EQ
(
y
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Y tensor"
));
PADDLE_ENFORCE_NE
(
y
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Y tensor"
)
);
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
PADDLE_ENFORCE
(
axis
>=
0
&&
axis
<
x_dims
.
size
(),
"Axis should be in range [0, x_dims)"
);
auto
y_dims
=
trim_trailing_singular_dims
(
y_dims_untrimed
);
axis
=
(
y_dims
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
,
is_run_common_broadcast
;
get_mid_dims
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
,
&
is_run_common_broadcast
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
y_data
=
y
->
data
<
T
>
();
if
(
post
==
1
)
{
functor
.
RunRowWise
(
n
,
pre
);
}
else
{
functor
.
RunMidWise
(
n
,
pre
,
post
);
}
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
format
);
}
else
{
PADDLE_ENFORCE_EQ
(
x
->
layout
(),
DataLayout
::
kMKLDNN
,
"Wrong layout set for X tensor"
);
PADDLE_ENFORCE_NE
(
x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
"Wrong format set for X tensor"
);
PADDLE_ENFORCE_EQ
(
y
->
layout
(),
DataLayout
::
kMKLDNN
,
"Wrong layout set for Y tensor"
);
PADDLE_ENFORCE_NE
(
y
->
format
(),
MKLDNNMemoryFormat
::
undef
,
"Wrong format set for Y tensor"
);
auto
src_x_tz
=
framework
::
vectorize
<
int64_t
>
(
x_dims
);
auto
src_y_tz
=
framework
::
vectorize
<
int64_t
>
(
y_dims_untrimed
);
auto
dst_tz
=
framework
::
vectorize
<
int64_t
>
(
z_dims
);
auto
src_x_tz
=
framework
::
vectorize
<
int64_t
>
(
x
->
dims
());
auto
src_y_tz
=
framework
::
vectorize
<
int64_t
>
(
y
->
dims
());
auto
dst_tz
=
framework
::
vectorize
<
int64_t
>
(
z
->
dims
());
std
::
vector
<
float
>
scales
=
{
1.0
f
,
1.0
f
};
...
...
@@ -145,24 +73,19 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto
src_x_memory
=
handler
.
AcquireSrcMemory
(
{{
src_x_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
()},
paddle
::
platform
::
to_void_cast
(
x_data
));
auto
src_y_memory
=
handler
.
AcquireSecondSrcMemory
(
{{
src_y_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
y
->
format
()},
paddle
::
platform
::
to_void_cast
(
y_data
));
auto
dst_md
=
memory
::
desc
({
dst_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
MKLDNNMemoryFormat
::
any
);
auto
sum_pd
=
handler
.
AcquireSumPrimitiveDescriptor
(
{
src_x_memory
,
src_y_memory
},
scales
,
dst_md
);
T
*
z_data
=
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
sum_pd
->
dst_desc
().
get_size
());
auto
dst_memory
=
handler
.
AcquireDstMemoryFromPrimitive
(
z_data
);
auto
sum_prim
=
handler
.
AcquireSum
();
mkldnn
::
stream
astream
(
mkldnn_engine
);
sum_prim
->
execute
(
astream
,
{{
MKLDNN_ARG_MULTIPLE_SRC
,
*
src_x_memory
},
{
MKLDNN_ARG_MULTIPLE_SRC
+
1
,
*
src_y_memory
},
{
MKLDNN_ARG_DST
,
*
dst_memory
}});
...
...
@@ -171,7 +94,6 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
platform
::
GetMKLDNNFormat
(
*
dst_memory
));
}
}
};
template
<
typename
T
>
...
...
@@ -184,19 +106,12 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
// skip out, x, y,
// dout length is larger or equal than dx, dy.
auto
*
out
=
dout
;
auto
*
x
=
dout
,
*
y
=
dout
;
auto
set_mkldnn_format
=
[](
Tensor
*
in
,
const
Tensor
*
out
)
{
in
->
set_layout
(
DataLayout
::
kMKLDNN
);
in
->
set_format
(
out
->
format
());
};
if
(
dx
!=
nullptr
&&
dy
!=
nullptr
&&
dx
->
dims
()
==
dy
->
dims
())
{
if
(
dx
->
dims
()
==
dy
->
dims
())
{
auto
blas
=
math
::
GetBlas
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
if
(
dx
)
{
blas
.
VCOPY
(
dout
->
numel
(),
dout
->
data
<
T
>
(),
...
...
@@ -210,16 +125,6 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
set_mkldnn_format
(
dy
,
dout
);
}
}
}
else
{
// Execute default kernel when broadcast is needed
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
ElemwiseExplicitGradCompute
<
paddle
::
platform
::
CPUDeviceContext
,
T
,
IdentityGrad
<
T
>
,
IdentityGrad
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
IdentityGrad
<
T
>
(),
IdentityGrad
<
T
>
());
}
}
};
}
// namespace operators
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py
浏览文件 @
92462e94
...
...
@@ -15,121 +15,38 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
skip_check_grad_ci
from
paddle.fluid.tests.unittests.test_elementwise_add_op
import
*
'''
Some tests differ from the tests defined in test_elementwise_add_op.py
because MKLDNN does not support tensors of number of dimensions 3.
MKLDNN does not support tensors of dimensions number equal to 3.
Such dimensions cause exceptions in MKLDNN reorder primitive.
The DNNL-based kernel is used only when broadcasting is not required
(see GetExpectedKernelType() methods in elementwise_add_op.h).
'''
class
TestMKLDNNElementwiseAddOp
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
3
,
4
,
5
]).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
3
,
4
,
5
]).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
@
skip_check_grad_ci
(
reason
=
"[skip shape check] Use y_shape(1) to test broadcast."
)
class
TestMKLDNNElementwiseAddOp_scalar
(
TestElementwiseAddOp_scalar
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
4
,
5
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
@
skip_check_grad_ci
(
reason
=
"[skip shape check] Use y_shape(1,1) to test broadcast."
)
class
TestMKLDNNElementwiseAddOp_scalar2
(
TestElementwiseAddOp_scalar2
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
4
,
5
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_data_format
(
self
):
self
.
data_format
=
'MKLDNN'
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_Vector
(
TestElementwiseAddOp_Vector
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
class
Tes
MKLDNNtElementwiseAddOp_broadcast_0
(
TestElementwiseAddOp_broadcast_0
):
class
Tes
tMKLDNNElementwiseAddOp2
(
TestMKLDNNElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
2
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
100
,
1
,
1
,
1
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_broadcast_1
(
TestElementwiseAddOp_broadcast_1
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
100
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
100
,
1
,
1
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_broadcast_2
(
TestElementwiseAddOp_broadcast_2
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
2
,
3
,
100
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
1
,
1
,
100
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_broadcast_3
(
TestElementwiseAddOp_broadcast_3
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_broadcast_4
(
TestElementwiseAddOp_broadcast_4
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_rowwise_add_0
(
TestElementwiseAddOp_rowwise_add_0
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
10
,
12
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
10
,
12
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
10
,
12
,
1
)
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNElementwiseAddOp_rowwise_add_1
(
TestElementwiseAddOp_rowwise_add_1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
x
=
np
.
random
.
random
((
100
,
)).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
random
((
100
,
)).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
class
TestMKLDNNElementwiseAddOp_channelwise_add
(
TestElementwiseAddOp_channelwise_add
):
class
TestMKLDNNElementwiseAddOp3
(
TestMKLDNNElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
2
,
3
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
,
1
,
1
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
3
,
4
,
5
]).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
3
,
4
,
5
]).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
浏览文件 @
92462e94
...
...
@@ -27,8 +27,6 @@ class TestElementwiseAddOp(OpTest):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_add"
self
.
dtype
=
np
.
float64
self
.
axis
=
-
1
self
.
init_dtype
()
self
.
init_input_output
()
self
.
init_kernel_type
()
...
...
@@ -78,10 +76,10 @@ class TestElementwiseAddOp(OpTest):
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
def
init_dtype
(
self
):
pass
self
.
dtype
=
np
.
float64
def
init_axis
(
self
):
pass
self
.
axis
=
-
1
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录