Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ade50226
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ade50226
编写于
1月 25, 2020
作者:
L
lidanqing
提交者:
Tao Luo
1月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[UT Coverage]Improve sum_mkldnn_op line coverage (#22275)
上级
3099d9d4
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
129 addition
and
98 deletion
+129
-98
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+65
-83
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+17
-8
python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py
...paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py
+47
-7
未找到文件。
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
浏览文件 @
ade50226
...
...
@@ -54,102 +54,84 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
in_vars
=
ctx
.
MultiInputVar
(
"X"
);
const
int
N
=
in_vars
.
size
();
auto
out_var
=
ctx
.
OutputVar
(
"Out"
);
PADDLE_ENFORCE_NE
(
in_vars
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Input variable is empty."
));
bool
in_place
=
out_var
==
in_vars
[
0
];
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
LoDTensor
*
output
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dst_tz
=
framework
::
vectorize
<
int64_t
>
(
output
->
dims
());
auto
src_tz
=
dst_tz
;
MKLDNNMemoryFormat
output_format
{
MKLDNNMemoryFormat
::
undef
};
std
::
vector
<
float
>
scales
;
std
::
vector
<
memory
::
desc
>
srcs_md
;
std
::
vector
<
mkldnn
::
memory
>
srcs_mem
;
PADDLE_ENFORCE_EQ
(
in_vars
[
0
]
->
IsType
<
LoDTensor
>
(),
true
,
"Input[0] must be LoDTensors"
);
auto
&
input0
=
in_vars
[
0
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
input0
.
layout
(),
DataLayout
::
kMKLDNN
,
"Wrong layout set for inputs[0] tensor"
);
PADDLE_ENFORCE_NE
(
input0
.
format
(),
MKLDNNMemoryFormat
::
undef
,
"Wrong format set for inputs[0] tensor"
);
MKLDNNMemoryFormat
input_format
=
input0
.
format
();
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
PADDLE_ENFORCE_EQ
(
in_vars
[
i
]
->
IsType
<
LoDTensor
>
(),
true
,
"all inputs must be all LoDTensors"
);
auto
&
input
=
in_vars
[
i
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
input
.
layout
(),
DataLayout
::
kMKLDNN
,
"Wrong layout set for inputs"
);
PADDLE_ENFORCE_NE
(
input
.
format
(),
MKLDNNMemoryFormat
::
undef
,
"Wrong format set for inputs"
);
if
(
input
.
numel
()
==
0
)
{
continue
;
}
const
T
*
input_data
=
input
.
data
<
T
>
();
auto
src_md
=
memory
::
desc
(
src_tz
,
memory
::
data_type
::
f32
,
input_format
);
auto
src_mem
=
memory
(
src_md
,
mkldnn_engine
,
to_void_cast
(
input_data
));
srcs_md
.
push_back
(
src_md
);
srcs_mem
.
push_back
(
src_mem
);
scales
.
push_back
(
1.0
);
}
LoDTensor
*
output
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dst_md
=
memory
::
desc
(
dst_tz
,
memory
::
data_type
::
f32
,
MKLDNNMemoryFormat
::
any
);
auto
dst_tz
=
framework
::
vectorize
<
int64_t
>
(
output
->
dims
());
auto
src_tz
=
dst_tz
;
MKLDNNMemoryFormat
output_format
{
MKLDNNMemoryFormat
::
undef
};
std
::
vector
<
float
>
scales
;
std
::
vector
<
memory
::
desc
>
srcs_md
;
std
::
vector
<
mkldnn
::
memory
>
srcs_mem
;
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_md
,
mkldnn_engine
);
auto
&
input0
=
in_vars
[
0
]
->
Get
<
LoDTensor
>
();
in_place
=
(
input0
.
numel
()
>
0
)
&&
(
input0
.
data
<
T
>
()
==
output_data
);
std
::
shared_ptr
<
memory
>
dst_mem
;
if
(
in_place
)
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_desc
(),
mkldnn_engine
));
}
else
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_desc
(),
mkldnn_engine
,
output_data
));
}
MKLDNNMemoryFormat
input_format
=
input0
.
format
();
auto
sum_prim
=
mkldnn
::
sum
(
sum_pd
);
output_format
=
platform
::
GetMKLDNNFormat
(
sum_pd
.
dst_desc
());
std
::
shared_ptr
<
mkldnn
::
reorder
>
reorder_p
;
std
::
shared_ptr
<
memory
>
target_mem
;
if
(
in_place
)
{
output_format
=
input_format
;
target_mem
.
reset
(
new
memory
({{
src_tz
},
memory
::
data_type
::
f32
,
output_format
},
mkldnn_engine
,
output_data
));
reorder_p
=
std
::
make_shared
<
reorder
>
(
*
dst_mem
,
*
target_mem
);
for
(
size_t
i
=
0
;
i
<
in_vars
.
size
();
i
++
)
{
auto
&
input_it
=
in_vars
[
i
]
->
Get
<
LoDTensor
>
();
if
(
input_it
.
numel
()
==
0
)
{
continue
;
}
mkldnn
::
stream
astream
(
mkldnn_engine
);
std
::
unordered_map
<
int
,
memory
>
args
;
for
(
size_t
i
=
0
;
i
<
srcs_mem
.
size
();
++
i
)
{
args
.
insert
({
MKLDNN_ARG_MULTIPLE_SRC
+
i
,
srcs_mem
.
at
(
i
)});
}
args
.
insert
({
MKLDNN_ARG_DST
,
*
dst_mem
});
const
T
*
input_data
=
input_it
.
data
<
T
>
();
sum_prim
.
execute
(
astream
,
args
);
astream
.
wait
();
auto
src_md
=
memory
::
desc
(
src_tz
,
memory
::
data_type
::
f32
,
input_format
);
auto
src_mem
=
memory
(
src_md
,
mkldnn_engine
,
to_void_cast
(
input_data
));
srcs_md
.
push_back
(
src_md
);
srcs_mem
.
push_back
(
src_mem
);
scales
.
push_back
(
1.0
);
}
if
(
in_place
)
{
reorder_p
->
execute
(
astream
,
*
dst_mem
,
*
target_mem
);
astream
.
wait
();
}
auto
dst_md
=
memory
::
desc
(
dst_tz
,
memory
::
data_type
::
f32
,
MKLDNNMemoryFormat
::
any
);
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_md
,
mkldnn_engine
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
)
;
output
->
set_format
(
output_format
);
}
else
{
// Fallback to naive version
SumKernel
<
CPUDeviceContext
,
T
>
reference_kernel
;
reference_kernel
.
Compute
(
ctx
);
std
::
shared_ptr
<
memory
>
dst_mem
;
if
(
in_place
)
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_desc
(),
mkldnn_engine
));
}
else
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_desc
(),
mkldnn_engine
,
output_data
)
);
}
auto
sum_prim
=
mkldnn
::
sum
(
sum_pd
);
output_format
=
platform
::
GetMKLDNNFormat
(
sum_pd
.
dst_desc
());
std
::
shared_ptr
<
mkldnn
::
reorder
>
reorder_p
;
std
::
shared_ptr
<
memory
>
target_mem
;
if
(
in_place
)
{
output_format
=
input_format
;
target_mem
.
reset
(
new
memory
({{
src_tz
},
memory
::
data_type
::
f32
,
output_format
},
mkldnn_engine
,
output_data
));
reorder_p
=
std
::
make_shared
<
reorder
>
(
*
dst_mem
,
*
target_mem
);
}
mkldnn
::
stream
astream
(
mkldnn_engine
);
std
::
unordered_map
<
int
,
memory
>
args
;
for
(
size_t
i
=
0
;
i
<
srcs_mem
.
size
();
++
i
)
{
args
.
insert
({
MKLDNN_ARG_MULTIPLE_SRC
+
i
,
srcs_mem
.
at
(
i
)});
}
args
.
insert
({
MKLDNN_ARG_DST
,
*
dst_mem
});
sum_prim
.
execute
(
astream
,
args
);
astream
.
wait
();
if
(
in_place
)
{
reorder_p
->
execute
(
astream
,
*
dst_mem
,
*
target_mem
);
astream
.
wait
();
}
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
output_format
);
}
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
ade50226
...
...
@@ -113,14 +113,6 @@ class SumOp : public framework::OperatorWithKernel {
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensor
>
())
{
int
dtype
=
-
1
;
for
(
size_t
idx
=
0
;
idx
<
x_vars
.
size
();
++
idx
)
{
...
...
@@ -141,6 +133,23 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NE
(
dtype
,
-
1
,
"Sum operator should have at least one tensor"
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
)
&&
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
)
==
framework
::
proto
::
VarType
::
FP32
&&
ctx
.
OutputVar
(
"Out"
)
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
std
::
all_of
(
x_vars
.
begin
(),
x_vars
.
end
(),
[](
const
framework
::
Variable
*
v
)
{
return
v
->
IsType
<
framework
::
LoDTensor
>
();
}))
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
}
}
#endif
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
ctx
.
GetPlace
(),
layout
,
library
);
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py
浏览文件 @
ade50226
...
...
@@ -15,25 +15,26 @@
from
__future__
import
print_function
import
unittest
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.test_sum_op
import
TestSumOp
import
numpy
as
np
import
paddle.fluid.op
as
fluid_op
class
TestMKLDNN
(
TestSumOp
):
class
Test
Sum
MKLDNN
(
TestSumOp
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
init_
kernel
_type
()
self
.
init_
data
_type
()
self
.
use_mkldnn
=
True
x0
=
np
.
random
.
random
((
25
,
4
)).
astype
(
self
.
dtype
)
x1
=
np
.
random
.
random
((
25
,
4
)).
astype
(
self
.
dtype
)
x2
=
np
.
random
.
random
((
25
,
4
)).
astype
(
self
.
dtype
)
x0
=
np
.
random
.
random
((
25
,
8
)).
astype
(
self
.
dtype
)
x1
=
np
.
random
.
random
((
25
,
8
)).
astype
(
self
.
dtype
)
x2
=
np
.
random
.
random
((
25
,
8
)).
astype
(
self
.
dtype
)
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
y
=
x0
+
x1
+
x2
self
.
outputs
=
{
'Out'
:
y
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
def
init_
kernel
_type
(
self
):
def
init_
data
_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
...
...
@@ -45,5 +46,44 @@ class TestMKLDNN(TestSumOp):
self
.
check_grad
([
'x0'
],
'Out'
,
check_dygraph
=
False
)
class
TestMKLDNNSumInplaceOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
init_data_type
()
self
.
use_mkldnn
=
True
self
.
x0
=
np
.
random
.
random
((
25
,
8
)).
astype
(
self
.
dtype
)
self
.
x1
=
np
.
random
.
random
((
25
,
8
)).
astype
(
self
.
dtype
)
def
init_data_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
place
=
core
.
CPUPlace
()
scope
=
core
.
Scope
()
out_var_name
=
"x0"
inputs
=
{
"X"
:
[(
"x0"
,
self
.
x0
),
(
"x1"
,
self
.
x1
)]}
for
input_key
in
inputs
:
for
per_input
in
inputs
[
input_key
]:
var_name
,
var_value
=
per_input
[
0
],
per_input
[
1
]
var
=
scope
.
var
(
var_name
)
tensor
=
var
.
get_tensor
()
tensor
.
set
(
var_value
,
place
)
sum_op
=
fluid_op
.
Operator
(
"sum"
,
X
=
[
"x0"
,
"x1"
],
Out
=
out_var_name
,
use_mkldnn
=
True
)
expected_out
=
np
.
array
(
self
.
x0
+
self
.
x1
)
sum_op
.
run
(
scope
,
place
)
out
=
scope
.
find_var
(
"x0"
).
get_tensor
()
out_array
=
np
.
array
(
out
)
self
.
assertTrue
(
np
.
allclose
(
expected_out
,
out_array
,
atol
=
1e-5
),
"Inplace sum_mkldnn_op output has diff with expected output"
)
def
test_check_grad
(
self
):
pass
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录