Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6ebe9877
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6ebe9877
编写于
2月 25, 2019
作者:
M
Michal Gallus
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Improve code reuse at MKL-DNN sum
test=develop
上级
a6e3cd5e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
4 addition
and
108 deletion
+4
-108
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+4
-108
未找到文件。
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
浏览文件 @
6ebe9877
...
...
@@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory
::
format
input_format
=
input0
.
format
();
if
(
src_tz
.
size
()
==
1
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
x
;
}
if
(
src_tz
.
size
()
==
2
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
nc
;
}
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
LoDTensor
>
(),
"all inputs must be all LoDTensors"
);
...
...
@@ -147,105 +138,10 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
output_format
);
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std
::
unique_ptr
<
framework
::
SelectedRows
>
in0
;
if
(
in_place
)
{
// If is in_place, we store the input[0] to in0
auto
&
in_sel0
=
in_vars
[
0
]
->
Get
<
SelectedRows
>
();
auto
&
rows
=
in_sel0
.
rows
();
in0
.
reset
(
new
framework
::
SelectedRows
(
rows
,
in_sel0
.
height
()));
in0
->
mutable_value
()
->
ShareDataWith
(
in_sel0
.
value
());
}
auto
get_selected_row
=
[
&
](
size_t
i
)
->
const
SelectedRows
&
{
if
(
i
==
0
&&
in0
)
{
return
*
in0
;
}
else
{
return
in_vars
[
i
]
->
Get
<
SelectedRows
>
();
}
};
auto
*
out
=
ctx
.
Output
<
SelectedRows
>
(
"Out"
);
out
->
mutable_rows
()
->
clear
();
auto
*
out_value
=
out
->
mutable_value
();
// Runtime InferShape
size_t
first_dim
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
first_dim
+=
sel_row
.
rows
().
size
();
}
std
::
vector
<
int64_t
>
in_dim
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
>
0
)
{
in_dim
=
framework
::
vectorize
(
sel_row
.
value
().
dims
());
break
;
}
}
if
(
in_dim
.
empty
())
{
VLOG
(
3
)
<<
"WARNING: all the inputs are empty"
;
in_dim
=
framework
::
vectorize
(
get_selected_row
(
N
-
1
).
value
().
dims
());
}
else
{
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
}
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
return
;
}
math
::
SelectedRowsAddTo
<
CPUDeviceContext
,
T
>
functor
;
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
functor
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
sel_row
,
offset
,
out
);
offset
+=
sel_row
.
value
().
numel
();
}
}
else
if
(
out_var
->
IsType
<
framework
::
LoDTensorArray
>
())
{
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto
&
out_array
=
*
out_var
->
GetMutable
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_vars
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensorArray
>
(),
"Only support all inputs are TensorArray"
);
auto
&
in_array
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
0
;
i
<
in_array
.
size
();
++
i
)
{
if
(
in_array
[
i
].
numel
()
!=
0
)
{
if
(
i
>=
out_array
.
size
())
{
out_array
.
resize
(
i
+
1
);
}
if
(
out_array
[
i
].
numel
()
==
0
)
{
framework
::
TensorCopy
(
in_array
[
i
],
in_array
[
i
].
place
(),
ctx
.
device_context
(),
&
out_array
[
i
]);
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
}
else
{
PADDLE_ENFORCE
(
out_array
[
i
].
lod
()
==
in_array
[
i
].
lod
());
auto
in
=
EigenVector
<
T
>::
Flatten
(
in_array
[
i
]);
auto
result
=
EigenVector
<
T
>::
Flatten
(
out_array
[
i
]);
result
.
device
(
*
ctx
.
template
device_context
<
MKLDNNDeviceContext
>()
.
eigen_device
())
=
result
+
in
;
}
}
}
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
framework
::
ToTypeName
(
out_var
->
Type
()));
}
else
{
// Fallback to naive version
// TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support
SumKernel
<
CPUDeviceContext
,
T
>
reference_kernel
;
reference_kernel
.
Compute
(
ctx
);
}
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录