Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
49b09327
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
49b09327
编写于
11月 09, 2018
作者:
M
Michal Gallus
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
MKLDNN elementwise_mul: Reorder on non-nchw input, fallback on non-16 divisable fm
test=develop
上级
f820573b
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
131 addition
and
42 deletion
+131
-42
paddle/fluid/operators/elementwise_mul_mkldnn_op.cc
paddle/fluid/operators/elementwise_mul_mkldnn_op.cc
+73
-38
python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
...e/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
+58
-4
未找到文件。
paddle/fluid/operators/elementwise_mul_mkldnn_op.cc
浏览文件 @
49b09327
...
@@ -95,6 +95,26 @@ static void UpdateDataFormat(const framework::ExecutionContext& ctx,
...
@@ -95,6 +95,26 @@ static void UpdateDataFormat(const framework::ExecutionContext& ctx,
}
}
}
}
template
<
typename
T
>
static
void
ReorderInput
(
framework
::
Tensor
*
tensor
,
const
platform
::
Place
&
place
,
const
mkldnn
::
engine
&
engine
,
bool
isFourDim
)
{
using
platform
::
to_void_cast
;
auto
dims
=
paddle
::
framework
::
vectorize2int
(
tensor
->
dims
());
framework
::
Tensor
out_tensor
;
out_tensor
.
Resize
(
tensor
->
dims
());
out_tensor
.
set_format
(
isFourDim
?
memory
::
format
::
nchw
:
memory
::
format
::
nc
);
out_tensor
.
set_layout
(
tensor
->
layout
());
mkldnn
::
memory
input_memory
=
{{{
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
tensor
->
format
()},
engine
},
to_void_cast
<
T
>
(
tensor
->
data
<
T
>
())};
mkldnn
::
memory
output_memory
=
{{{
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out_tensor
.
format
()},
engine
},
to_void_cast
<
T
>
(
out_tensor
.
mutable_data
<
T
>
(
place
))};
platform
::
Reorder
(
input_memory
,
output_memory
);
tensor
->
ShareDataWith
(
out_tensor
);
}
template
<
typename
T
>
template
<
typename
T
>
class
ElementwiseMulMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
class
ElementwiseMulMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -111,63 +131,78 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
...
@@ -111,63 +131,78 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
auto
x_dims
=
x
->
dims
();
auto
x_dims
=
x
->
dims
();
auto
y_dims_untrimmed
=
y
->
dims
();
auto
y_dims_untrimmed
=
y
->
dims
();
auto
x_int_dims
=
paddle
::
framework
::
vectorize2int
(
x_dims
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
x
,
"x_data_format"
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
x
,
"x_data_format"
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
y
,
"y_data_format"
);
UpdateDataFormat
(
ctx
,
(
Tensor
*
)
y
,
"y_data_format"
);
if
(
x
->
format
()
==
memory
::
format
::
nChw16c
&&
y
->
format
()
==
memory
::
format
::
nc
)
{
const
bool
are_dims_divisable
=
!
(
x_int_dims
[
1
]
%
16
);
if
(
x_dims
!=
y_dims_untrimmed
)
{
const
bool
is_x_format_correct
=
x
->
format
()
==
memory
::
format
::
nChw16c
;
int
pre
,
n
,
post
;
const
bool
is_y_format_correct
=
y
->
format
()
==
memory
::
format
::
nc
;
get_mid_dims
(
x_dims
,
y_dims_untrimmed
,
axis
,
&
pre
,
&
n
,
&
post
);
if
(
is_x_format_correct
&&
is_y_format_correct
&&
are_dims_divisable
)
{
int
pre
,
n
,
post
;
get_mid_dims
(
x_dims
,
y_dims_untrimmed
,
axis
,
&
pre
,
&
n
,
&
post
);
if
(
post
==
1
)
{
if
(
post
==
1
)
{
PADDLE_THROW
(
"Not implemented when post is 1"
);
PADDLE_THROW
(
"Not implemented when post is 1"
);
}
else
{
}
else
{
// Just check whether it works for RE-Resnext.
// Just check whether it works for RE-Resnext.
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
4
,
"X should have 4 dimensions"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
4
,
"X should have 4 dimensions"
);
int
n
=
x_dims
[
0
];
int
n
=
x_dims
[
0
];
int
c
=
x_dims
[
1
];
int
c
=
x_dims
[
1
];
int
h
=
x_dims
[
2
];
int
h
=
x_dims
[
2
];
int
w
=
x_dims
[
3
];
int
w
=
x_dims
[
3
];
PADDLE_ENFORCE
(
y_dims_untrimmed
[
0
]
==
n
&&
y_dims_untrimmed
[
1
]
==
c
,
PADDLE_ENFORCE
(
y_dims_untrimmed
[
0
]
==
n
&&
y_dims_untrimmed
[
1
]
==
c
,
"Y should be in nc format"
);
"Y should be in nc format"
);
constexpr
int
simd_width
=
16
;
constexpr
int
simd_width
=
16
;
int
C
=
c
/
simd_width
;
int
C
=
c
/
simd_width
;
vector_mul
mul
;
vector_mul
mul
;
using
mul_func_t
=
using
mul_func_t
=
void
(
*
)(
const
float
*
,
const
float
*
,
float
*
,
int
,
int
);
void
(
*
)(
const
float
*
,
const
float
*
,
float
*
,
int
,
int
);
mul_func_t
mul_func
=
(
mul_func_t
)
mul
.
getCode
();
mul_func_t
mul_func
=
(
mul_func_t
)
mul
.
getCode
();
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
for
(
int
ci
=
0
;
ci
<
C
;
ci
++
)
{
for
(
int
ci
=
0
;
ci
<
C
;
ci
++
)
{
auto
ptr_x
=
auto
ptr_x
=
x_data
+
ni
*
C
*
h
*
w
*
simd_width
+
x_data
+
ni
*
C
*
h
*
w
*
simd_width
+
ci
*
h
*
w
*
simd_width
;
ci
*
h
*
w
*
simd_width
;
auto
ptr_y
=
y_data
+
ni
*
C
*
simd_width
+
ci
*
simd_width
;
auto
ptr_y
=
y_data
+
ni
*
C
*
simd_width
+
ci
*
simd_width
;
auto
ptr_z
=
auto
ptr_z
=
z_data
+
ni
*
C
*
h
*
w
*
simd_width
+
z_data
+
ni
*
C
*
h
*
w
*
simd_width
+
ci
*
h
*
w
*
simd_width
;
ci
*
h
*
w
*
simd_width
;
mul_func
(
ptr_x
,
ptr_y
,
ptr_z
,
h
,
w
);
mul_func
(
ptr_x
,
ptr_y
,
ptr_z
,
h
,
w
);
}
}
}
}
}
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
x
->
format
());
}
else
{
PADDLE_THROW
(
"Not implemented when dims are equal"
);
}
}
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
x
->
format
());
}
else
{
}
else
{
// Fallback to naive version:
// Fallback to naive version:
const
bool
are_inputs_in_same_format
=
x
->
format
()
==
y
->
format
();
const
bool
is_x_nchw
=
x
->
format
()
==
memory
::
format
::
nchw
;
const
bool
is_x_nc
=
x
->
format
()
==
memory
::
format
::
nc
;
const
bool
is_y_nchw
=
y
->
format
()
==
memory
::
format
::
nchw
;
const
bool
is_y_nc
=
y
->
format
()
==
memory
::
format
::
nc
;
if
(
!
are_inputs_in_same_format
)
{
using
platform
::
MKLDNNDeviceContext
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
if
(
!
(
is_x_nchw
||
is_x_nc
))
ReorderInput
<
T
>
((
Tensor
*
)
x
,
ctx
.
GetPlace
(),
mkldnn_engine
,
x
->
dims
().
size
()
==
4
);
if
(
!
(
is_y_nchw
||
is_y_nc
))
ReorderInput
<
T
>
((
Tensor
*
)
y
,
ctx
.
GetPlace
(),
mkldnn_engine
,
y
->
dims
().
size
()
==
4
);
}
auto
mul_func
=
[](
T
a
,
T
b
)
->
T
{
return
a
*
b
;
};
auto
mul_func
=
[](
T
a
,
T
b
)
->
T
{
return
a
*
b
;
};
TransformFunctor
<
decltype
(
mul_func
),
T
,
TransformFunctor
<
decltype
(
mul_func
),
T
,
...
...
python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py
浏览文件 @
49b09327
...
@@ -49,7 +49,7 @@ class TestElementwiseMulMKLDNNOp_BroadcastNCHW16c(ElementwiseMulOp):
...
@@ -49,7 +49,7 @@ class TestElementwiseMulMKLDNNOp_BroadcastNCHW16c(ElementwiseMulOp):
def
test_check_grad_ingore_y
(
self
):
def
test_check_grad_ingore_y
(
self
):
pass
pass
@
unittest
.
skip
(
"Not implemented yet."
)
@
unittest
.
skip
(
"Not implemented yet."
)
# TODO(mgallus): enable when implemented.
class
TestElementwiseMulMKLDNNOp_BroadcastNCHW8c
(
ElementwiseMulOp
):
class
TestElementwiseMulMKLDNNOp_BroadcastNCHW8c
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
def
init_input_output
(
self
):
x
=
np
.
random
.
rand
(
1
,
8
,
2
,
2
).
astype
(
self
.
dtype
)
x
=
np
.
random
.
rand
(
1
,
8
,
2
,
2
).
astype
(
self
.
dtype
)
...
@@ -159,8 +159,7 @@ class TestElementwiseMulMKLDNNOp_FallbackNoReorders(ElementwiseMulOp):
...
@@ -159,8 +159,7 @@ class TestElementwiseMulMKLDNNOp_FallbackNoReorders(ElementwiseMulOp):
def
test_check_grad_ingore_y
(
self
):
def
test_check_grad_ingore_y
(
self
):
pass
pass
@
unittest
.
skip
(
"Not implemented yet."
)
class
TestElementwiseMulMKLDNNOp_FallbackWithReorder1
(
ElementwiseMulOp
):
class
TestElementwiseMulMKLDNNOp_FallbackWithReorder
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
...
@@ -169,7 +168,7 @@ class TestElementwiseMulMKLDNNOp_FallbackWithReorder(ElementwiseMulOp):
...
@@ -169,7 +168,7 @@ class TestElementwiseMulMKLDNNOp_FallbackWithReorder(ElementwiseMulOp):
self
.
out
=
self
.
x
*
y
self
.
out
=
self
.
x
*
y
def
setUp
(
self
):
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_Fallback
NCHW16C
,
self
).
setUp
()
super
(
TestElementwiseMulMKLDNNOp_Fallback
WithReorder1
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw"
self
.
attrs
[
"x_data_format"
]
=
"nchw"
self
.
attrs
[
"y_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nchw16c"
...
@@ -188,5 +187,60 @@ class TestElementwiseMulMKLDNNOp_FallbackWithReorder(ElementwiseMulOp):
...
@@ -188,5 +187,60 @@ class TestElementwiseMulMKLDNNOp_FallbackWithReorder(ElementwiseMulOp):
def
test_check_grad_ingore_y
(
self
):
def
test_check_grad_ingore_y
(
self
):
pass
pass
class
TestElementwiseMulMKLDNNOp_FallbackWithReorder2
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
y
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
x
=
np
.
random
.
rand
(
1
,
16
,
2
,
2
).
astype
(
self
.
dtype
)
self
.
x
=
x
.
transpose
(
0
,
2
,
3
,
1
).
reshape
(
1
,
16
,
2
,
2
)
self
.
out
=
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackWithReorder2
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nchw16c"
self
.
attrs
[
"y_data_format"
]
=
"nchw"
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
class
TestElementwiseMulMKLDNNOp_FallbackNoReorders2
(
ElementwiseMulOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
16
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
*
self
.
y
def
setUp
(
self
):
super
(
TestElementwiseMulMKLDNNOp_FallbackNoReorders2
,
self
).
setUp
()
self
.
attrs
[
"x_data_format"
]
=
"nc"
self
.
attrs
[
"y_data_format"
]
=
"nc"
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
def
init_axis
(
self
):
self
.
axis
=
0
def
test_check_grad_normal
(
self
):
pass
def
test_check_grad_ingore_x
(
self
):
pass
def
test_check_grad_ingore_y
(
self
):
pass
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录