Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
43527a2b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
43527a2b
编写于
4月 30, 2021
作者:
J
jakpiase
提交者:
GitHub
4月 30, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Reduce grad fix (#32592)
上级
a3e77197
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
79 addition
and
73 deletion
+79
-73
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
...luid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
+2
-1
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+60
-30
paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
...fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
+2
-1
paddle/fluid/operators/reduce_ops/reduce_op.h
paddle/fluid/operators/reduce_ops/reduce_op.h
+5
-20
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+10
-21
未找到文件。
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
浏览文件 @
43527a2b
...
...
@@ -45,7 +45,8 @@ class ReduceMeanGradMKLDNNKernel : public ReduceGradMKLDNNKernel<T> {
number_of_elements
=
input_x
->
numel
();
}
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
0.0
f
,
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
dnnl
::
algorithm
::
reduction_mean
,
0.0
f
,
1.0
L
/
number_of_elements
);
}
};
...
...
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
浏览文件 @
43527a2b
...
...
@@ -21,6 +21,27 @@ using paddle::framework::LoDTensor;
using
paddle
::
framework
::
Tensor
;
using
platform
::
to_void_cast
;
inline
std
::
vector
<
int64_t
>
CalculateReducedDims
(
const
Tensor
*
input
,
const
Tensor
*
output
,
std
::
vector
<
int
>&
reduce_dims
,
bool
reduce_all
,
bool
keep_dim
)
{
if
(
keep_dim
)
return
framework
::
vectorize
(
output
->
dims
());
if
(
reduce_all
)
return
std
::
vector
<
int64_t
>
(
framework
::
vectorize
(
input
->
dims
()).
size
(),
1
);
std
::
vector
<
int64_t
>
output_dims
(
framework
::
vectorize
(
input
->
dims
()));
for
(
size_t
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
{
reduce_dims
[
i
]
=
(
reduce_dims
[
i
]
>=
0
)
?
reduce_dims
[
i
]
:
input
->
dims
().
size
()
+
reduce_dims
[
i
];
output_dims
[
reduce_dims
[
i
]]
=
1
;
}
return
output_dims
;
}
template
<
typename
T
>
class
ReduceMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -37,9 +58,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
bool
reduce_all
=
ctx
.
Attr
<
bool
>
(
"reduce_all"
);
bool
keep_dim
=
ctx
.
Attr
<
bool
>
(
"keep_dim"
);
std
::
vector
<
int64_t
>
output_dims
=
CalculateOutputDims
(
input
,
output
,
reduce_dims
,
reduce_all
,
keep_dim
);
auto
output_dims
=
CalculateReducedDims
(
input
,
output
,
reduce_dims
,
reduce_all
,
keep_dim
);
auto
input_dims
=
framework
::
vectorize
(
input
->
dims
());
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
...
...
@@ -96,53 +116,63 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
paddle
::
framework
::
vectorize
<
int64_t
>
(
output
->
dims
()))));
}
}
private:
std
::
vector
<
int64_t
>
CalculateOutputDims
(
const
Tensor
*
input
,
const
Tensor
*
output
,
std
::
vector
<
int
>&
reduce_dims
,
bool
reduce_all
,
bool
keep_dim
)
const
{
if
(
keep_dim
)
return
framework
::
vectorize
(
output
->
dims
());
if
(
reduce_all
)
return
std
::
vector
<
int64_t
>
(
framework
::
vectorize
(
input
->
dims
()).
size
(),
1
);
std
::
vector
<
int64_t
>
output_dims
(
framework
::
vectorize
(
input
->
dims
()));
for
(
size_t
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
{
reduce_dims
[
i
]
=
(
reduce_dims
[
i
]
>=
0
)
?
reduce_dims
[
i
]
:
input
->
dims
().
size
()
+
reduce_dims
[
i
];
output_dims
[
reduce_dims
[
i
]]
=
1
;
}
return
output_dims
;
}
};
template
<
typename
T
>
class
ReduceGradMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
,
dnnl
::
algorithm
binary_type
,
float
scale_x
,
float
scale_y
)
const
{
dnnl
::
algorithm
binary_type
,
dnnl
::
algorithm
reduction_type
,
float
scale_
x
,
float
scale_
y
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
bool
keep_dim
=
ctx
.
Attr
<
bool
>
(
"keep_dim"
);
bool
reduce_all
=
ctx
.
Attr
<
bool
>
(
"reduce_all"
);
auto
dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
*
input_dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
output_dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
mkldnn
::
memory
::
format_tag
x_format_tag
;
auto
input_dims
=
CalculateReducedDims
(
output_dx
,
input_dy
,
dims
,
reduce_all
,
keep_dim
);
if
(
input_dims
!=
framework
::
vectorize
(
output_dx
->
dims
()))
{
const
std
::
string
key_pd
=
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
output_dx
->
dims
()),
ctx
.
InputName
(
"X"
),
(
std
::
to_string
(
static_cast
<
int
>
(
reduction_type
))))
+
"@fwd_pd"
;
std
::
shared_ptr
<
dnnl
::
reduction
::
primitive_desc
>
fwd_pd
=
std
::
static_pointer_cast
<
dnnl
::
reduction
::
primitive_desc
>
(
dev_ctx
.
GetBlob
(
key_pd
));
PADDLE_ENFORCE_NOT_NULL
(
fwd_pd
,
platform
::
errors
::
Unavailable
(
"Forward primitive descriptor is not available in %s op, "
"cannot deduce memory format tag"
,
ctx
.
Type
()));
x_format_tag
=
platform
::
GetMKLDNNFormat
(
fwd_pd
->
src_desc
());
PADDLE_ENFORCE_NE
(
x_format_tag
,
mkldnn
::
memory
::
format_tag
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Cannot deduce format tag for %s op"
,
ctx
.
Type
()));
}
else
{
// fwd descriptor not available because reorder was used instead
// of reduction
x_format_tag
=
getPlainFormatTag
(
output_dx
);
}
output_dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
output_dx
->
set_format
(
getPlainFormatTag
(
output_dx
)
);
output_dx
->
set_format
(
x_format_tag
);
output_dx
->
set_layout
(
input_dy
->
layout
());
platform
::
BroadcastDataMKLDNNHandler
<
T
>
handler
(
binary_type
,
dev_ctx
,
onednn_engine
,
ctx
.
GetPlace
(),
output_dx
,
input_dy
,
scale_x
,
scale_y
,
ctx
.
InputName
(
framework
::
GradVarName
(
"Out"
)));
ctx
.
InputName
(
framework
::
GradVarName
(
"Out"
))
,
input_dims
);
const
auto
src_dx_memory
=
handler
.
AcquireSrcMemory
(
output_dx
);
const
auto
src_dy_memory
=
handler
.
AcquireSecondSrcMemory
(
input_dy
);
...
...
paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
浏览文件 @
43527a2b
...
...
@@ -29,7 +29,8 @@ template <typename T>
class
ReduceSumGradMKLDNNKernel
:
public
ReduceGradMKLDNNKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
0.0
f
,
1.0
f
);
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
dnnl
::
algorithm
::
reduction_sum
,
0.0
f
,
1.0
f
);
}
};
...
...
paddle/fluid/operators/reduce_ops/reduce_op.h
浏览文件 @
43527a2b
...
...
@@ -559,8 +559,11 @@ class ReduceGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
framework
::
GradVarName
(
"Out"
));
int
in_dtype
=
ctx
.
Attr
<
int
>
(
"in_dtype"
);
auto
input_data_type
=
(
in_dtype
>=
0
)
?
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
in_dtype
)
:
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
framework
::
GradVarName
(
"Out"
));
#ifdef PADDLE_WITH_MKLDNN
auto
CanMKLDNNReduceGradBeUsed
=
[
&
]()
{
...
...
@@ -568,18 +571,6 @@ class ReduceGradOp : public framework::OperatorWithKernel {
if
(
dx_dims
.
size
()
>
5
)
return
false
;
// max 5D tensor is supported
if
(
ctx
.
Attr
<
bool
>
(
"reduce_all"
)
||
((
int
)
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
).
size
()
==
dx_dims
.
size
()))
return
true
;
auto
dy_dims
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
dims
();
// Subtensor must be on rightmost part of the bigger tensor
for
(
int
i
=
0
;
i
<
dy_dims
.
size
();
++
i
)
{
if
(
dx_dims
[
dx_dims
.
size
()
-
dy_dims
.
size
()
+
i
]
!=
dy_dims
[
i
])
{
return
false
;
}
}
return
true
;
};
if
(
this
->
CanMKLDNNBeUsed
(
ctx
,
input_data_type
)
&&
...
...
@@ -590,12 +581,6 @@ class ReduceGradOp : public framework::OperatorWithKernel {
}
#endif
int
in_dtype
=
ctx
.
Attr
<
int
>
(
"in_dtype"
);
if
(
in_dtype
>=
0
)
{
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
in_dtype
),
ctx
.
GetPlace
());
}
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
43527a2b
...
...
@@ -639,7 +639,8 @@ class BroadcastDataMKLDNNHandler
const
mkldnn
::
engine
engine
,
platform
::
Place
cpu_place
,
const
Tensor
*
x
,
const
Tensor
*
y
,
float
scale_x
,
float
scale_y
,
const
std
::
string
&
uniq_name
)
const
std
::
string
&
uniq_name
,
std
::
vector
<
int64_t
>&
input_dims
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
binary
>
(
dev_ctx
,
engine
,
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
x
->
dims
()),
...
...
@@ -659,24 +660,12 @@ class BroadcastDataMKLDNNHandler
y
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Y tensor."
));
auto
src1_tz
=
framework
::
vectorize
(
y
->
dims
());
const
auto
src0_tz
=
framework
::
vectorize
(
x
->
dims
());
// GetExpectedKernelType checks if smaller vector is a subvector with all
// the dims in correct order on the rightmost part of the bigger vector,
// i.e. a correct vector for broadcasting:
// x = 5, 7, 3, 2, 4, 8
// y = 4, 8
src1_tz
.
reserve
(
src0_tz
.
size
());
for
(
size_t
i
=
src1_tz
.
size
();
i
<
src0_tz
.
size
();
++
i
)
{
src1_tz
.
insert
(
src1_tz
.
begin
(),
1L
);
}
const
auto
src0_md
=
dnnl
::
memory
::
desc
(
src0_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
const
auto
src1_md
=
dnnl
::
memory
::
desc
(
src1_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
input_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
dnnl
::
primitive_attr
attributes
;
attributes
.
set_scales
(
DNNL_ARG_SRC_0
,
0
,
{
scale_x
});
...
...
@@ -711,7 +700,7 @@ class ReductionMKLDNNHandler
const
mkldnn
::
engine
engine
,
platform
::
Place
cpu_place
,
const
Tensor
*
x
,
const
Tensor
*
y
,
const
std
::
string
&
uniq_name
,
std
::
vector
<
int64_t
>
output_dims
)
std
::
vector
<
int64_t
>
y_tz
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
reduction
>
(
dev_ctx
,
engine
,
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
x
->
dims
()),
...
...
@@ -725,14 +714,14 @@ class ReductionMKLDNNHandler
x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for X tensor."
));
const
auto
src
_tz
=
framework
::
vectorize
(
x
->
dims
());
const
auto
x
_tz
=
framework
::
vectorize
(
x
->
dims
());
const
auto
src
_md
=
dnnl
::
memory
::
desc
(
src
_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
const
auto
dst_md
=
memory
::
desc
(
output_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
const
auto
x
_md
=
dnnl
::
memory
::
desc
(
x
_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
const
auto
y_md
=
memory
::
desc
(
y_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
this
->
AcquireForwardPrimitiveDescriptor
(
algo
,
src_md
,
dst
_md
,
p
,
eps
);
this
->
AcquireForwardPrimitiveDescriptor
(
algo
,
x_md
,
y
_md
,
p
,
eps
);
}
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录