Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ead83422
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ead83422
编写于
4月 21, 2021
作者:
J
jakpiase
提交者:
GitHub
4月 21, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added oneDNN reduce_op GRAD kernel (#32280)
上级
1593ee25
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
329 addition
and
128 deletion
+329
-128
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
...luid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
+29
-0
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+60
-0
paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
...fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
+12
-0
paddle/fluid/operators/reduce_ops/reduce_op.h
paddle/fluid/operators/reduce_ops/reduce_op.h
+32
-3
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+72
-0
python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py
...luid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py
+104
-80
python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py
...dle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py
+20
-45
未找到文件。
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
浏览文件 @
ead83422
...
...
@@ -25,6 +25,31 @@ class ReduceMeanMKLDNNKernel : public ReduceMKLDNNKernel<T> {
}
};
template
<
typename
T
>
class
ReduceMeanGradMKLDNNKernel
:
public
ReduceGradMKLDNNKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
input_x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
input_dims
=
framework
::
vectorize
(
input_x
->
dims
());
auto
reduce_dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
int
number_of_elements
=
1
;
if
(
!
ctx
.
Attr
<
bool
>
(
"reduce_all"
))
{
for
(
size_t
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
{
reduce_dims
[
i
]
=
(
reduce_dims
[
i
]
>=
0
)
?
reduce_dims
[
i
]
:
input_dims
.
size
()
+
reduce_dims
[
i
];
number_of_elements
*=
input_dims
[
reduce_dims
[
i
]];
}
}
else
{
number_of_elements
=
input_x
->
numel
();
}
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
0.0
f
,
1.0
L
/
number_of_elements
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -32,3 +57,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL
(
reduce_mean
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
ReduceMeanMKLDNNKernel
<
float
>
,
ops
::
ReduceMeanMKLDNNKernel
<
paddle
::
platform
::
bfloat16
>
);
REGISTER_OP_KERNEL
(
reduce_mean_grad
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
ReduceMeanGradMKLDNNKernel
<
float
>
,
ops
::
ReduceMeanGradMKLDNNKernel
<
paddle
::
platform
::
bfloat16
>
);
paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
浏览文件 @
ead83422
...
...
@@ -121,5 +121,65 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
}
};
template
<
typename
T
>
class
ReduceGradMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
,
dnnl
::
algorithm
binary_type
,
float
scale_x
,
float
scale_y
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
*
input_dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
output_dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
output_dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
output_dx
->
set_format
(
getPlainFormatTag
(
output_dx
));
output_dx
->
set_layout
(
input_dy
->
layout
());
platform
::
BroadcastDataMKLDNNHandler
<
T
>
handler
(
binary_type
,
dev_ctx
,
onednn_engine
,
ctx
.
GetPlace
(),
output_dx
,
input_dy
,
scale_x
,
scale_y
,
ctx
.
InputName
(
framework
::
GradVarName
(
"Out"
)));
const
auto
src_dx_memory
=
handler
.
AcquireSrcMemory
(
output_dx
);
const
auto
src_dy_memory
=
handler
.
AcquireSecondSrcMemory
(
input_dy
);
const
auto
binary_prim
=
handler
.
AcquireForwardPrimitive
();
const
std
::
unordered_map
<
int
,
dnnl
::
memory
>
args
=
{
{
DNNL_ARG_SRC_0
,
*
src_dx_memory
},
{
DNNL_ARG_SRC_1
,
*
src_dy_memory
},
{
DNNL_ARG_DST
,
*
src_dx_memory
}};
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
binary_prim
->
execute
(
astream
,
args
);
astream
.
wait
();
}
protected:
mkldnn
::
memory
::
format_tag
getPlainFormatTag
(
const
Tensor
*
tensor
)
const
{
auto
tensor_dims_size
=
tensor
->
dims
().
size
();
PADDLE_ENFORCE_EQ
(
tensor_dims_size
<=
5
&&
tensor_dims_size
>=
1
,
true
,
platform
::
errors
::
InvalidArgument
(
"Dims for reduction_grad oneDNN op must be in range <1, 5>"
));
switch
(
tensor_dims_size
)
{
case
1
:
return
mkldnn
::
memory
::
format_tag
::
a
;
case
2
:
return
mkldnn
::
memory
::
format_tag
::
ab
;
case
3
:
return
mkldnn
::
memory
::
format_tag
::
abc
;
case
4
:
return
mkldnn
::
memory
::
format_tag
::
abcd
;
}
return
mkldnn
::
memory
::
format_tag
::
abcde
;
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
浏览文件 @
ead83422
...
...
@@ -25,6 +25,14 @@ class ReduceSumMKLDNNKernel : public ReduceMKLDNNKernel<T> {
}
};
template
<
typename
T
>
class
ReduceSumGradMKLDNNKernel
:
public
ReduceGradMKLDNNKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
this
->
RunKernel
(
ctx
,
dnnl
::
algorithm
::
binary_add
,
0.0
f
,
1.0
f
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -32,3 +40,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL
(
reduce_sum
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
ReduceSumMKLDNNKernel
<
float
>
,
ops
::
ReduceSumMKLDNNKernel
<
paddle
::
platform
::
bfloat16
>
);
REGISTER_OP_KERNEL
(
reduce_sum_grad
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
ReduceSumGradMKLDNNKernel
<
float
>
,
ops
::
ReduceSumGradMKLDNNKernel
<
paddle
::
platform
::
bfloat16
>
);
paddle/fluid/operators/reduce_ops/reduce_op.h
浏览文件 @
ead83422
...
...
@@ -559,15 +559,44 @@ class ReduceGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
framework
::
GradVarName
(
"Out"
));
#ifdef PADDLE_WITH_MKLDNN
auto
CanMKLDNNReduceGradBeUsed
=
[
&
]()
{
auto
dx_dims
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
();
if
(
dx_dims
.
size
()
>
5
)
return
false
;
// max 5D tensor is supported
if
(
ctx
.
Attr
<
bool
>
(
"reduce_all"
)
||
((
int
)
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
).
size
()
==
dx_dims
.
size
()))
return
true
;
auto
dy_dims
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
dims
();
// Subtensor must be on rightmost part of the bigger tensor
for
(
int
i
=
0
;
i
<
dy_dims
.
size
();
++
i
)
{
if
(
dx_dims
[
dx_dims
.
size
()
-
dy_dims
.
size
()
+
i
]
!=
dy_dims
[
i
])
{
return
false
;
}
}
return
true
;
};
if
(
this
->
CanMKLDNNBeUsed
(
ctx
,
input_data_type
)
&&
CanMKLDNNReduceGradBeUsed
())
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
}
#endif
int
in_dtype
=
ctx
.
Attr
<
int
>
(
"in_dtype"
);
if
(
in_dtype
>=
0
)
{
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
in_dtype
),
ctx
.
GetPlace
());
}
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
framework
::
GradVarName
(
"Out"
)),
ctx
.
GetPlace
());
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
ead83422
...
...
@@ -630,6 +630,78 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::binary> {
}
};
template
<
typename
T
>
class
BroadcastDataMKLDNNHandler
:
public
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
binary
>
{
public:
BroadcastDataMKLDNNHandler
(
const
dnnl
::
algorithm
algo
,
const
MKLDNNDeviceContext
&
dev_ctx
,
const
mkldnn
::
engine
engine
,
platform
::
Place
cpu_place
,
const
Tensor
*
x
,
const
Tensor
*
y
,
float
scale_x
,
float
scale_y
,
const
std
::
string
&
uniq_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
binary
>
(
dev_ctx
,
engine
,
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
x
->
dims
()),
uniq_name
))
{
if
(
!
this
->
isCached
())
{
PADDLE_ENFORCE_EQ
(
x
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for X tensor."
));
PADDLE_ENFORCE_NE
(
x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for X tensor."
));
PADDLE_ENFORCE_EQ
(
y
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Y tensor."
));
PADDLE_ENFORCE_NE
(
y
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Y tensor."
));
auto
src1_tz
=
framework
::
vectorize
(
y
->
dims
());
const
auto
src0_tz
=
framework
::
vectorize
(
x
->
dims
());
// GetExpectedKernelType checks if smaller vector is a subvector with all
// the dims in correct order on the rightmost part of the bigger vector,
// i.e. a correct vector for broadcasting:
// x = 5, 7, 3, 2, 4, 8
// y = 4, 8
src1_tz
.
reserve
(
src0_tz
.
size
());
for
(
size_t
i
=
src1_tz
.
size
();
i
<
src0_tz
.
size
();
++
i
)
{
src1_tz
.
insert
(
src1_tz
.
begin
(),
1L
);
}
const
auto
src0_md
=
dnnl
::
memory
::
desc
(
src0_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
const
auto
src1_md
=
dnnl
::
memory
::
desc
(
src1_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
x
->
format
());
dnnl
::
primitive_attr
attributes
;
attributes
.
set_scales
(
DNNL_ARG_SRC_0
,
0
,
{
scale_x
});
attributes
.
set_scales
(
DNNL_ARG_SRC_1
,
0
,
{
scale_y
});
this
->
AcquireForwardPrimitiveDescriptor
(
attributes
,
algo
,
src0_md
,
src1_md
,
src0_md
);
}
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
framework
::
Tensor
*
input
)
{
T
*
input_data
=
input
->
data
<
T
>
();
memset
(
input_data
,
0
,
this
->
fwd_pd_
->
src_desc
().
get_size
());
return
this
->
AcquireMemoryFromPrimitive
(
this
->
fwd_pd_
->
src_desc
(),
to_void_cast
<
T
>
(
input_data
),
"@src0_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSecondSrcMemory
(
const
framework
::
Tensor
*
input
)
{
const
T
*
input_data
=
input
->
data
<
T
>
();
return
this
->
AcquireMemoryFromPrimitive
(
this
->
fwd_pd_
->
src1_desc
(),
to_void_cast
<
T
>
(
input_data
),
"@src1_mem_p"
);
}
};
template
<
typename
T
>
class
ReductionMKLDNNHandler
:
public
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
reduction
>
{
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py
浏览文件 @
ead83422
...
...
@@ -26,158 +26,182 @@ import paddle
"place does not support BF16 evaluation"
)
@
unittest
.
skipIf
(
core
.
is_compiled_with_cuda
(),
"core is compiled with CUDA which has no BF implementation"
)
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSumDefaultBF16ONEDNNOp
(
OpTest
):
class
TestReduceSumDefaultBF16OneDNNOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
0
)}
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
axis
=
0
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
def
test_check_output
(
self
):
self
.
check_output
(
check_dygraph
=
False
)
def
calculate_grads
(
self
):
tmp_tensor
=
np
.
zeros
(
self
.
x_fp32
.
shape
).
astype
(
"float32"
)
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONEDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
10
,
5
,
5
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
2
]}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
prod_of_reduced_dims
=
self
.
inputs
[
'X'
].
shape
[
0
]
axis
=
0
if
"dim"
in
self
.
attrs
:
prod_of_reduced_dims
=
1
axis
=
tuple
(
self
.
attrs
[
'dim'
])
for
i
in
range
(
len
(
axis
)):
ax
=
axis
[
i
]
if
axis
[
i
]
<
0
:
ax
=
len
(
axis
)
+
axis
[
i
]
prod_of_reduced_dims
*=
self
.
inputs
[
'X'
].
shape
[
ax
]
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllWithoutReduceAllAttributeBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONEDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
normal
(
size
=
(
2
,
3
,
5
,
6
)).
astype
(
'float32'
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
0
,
1
,
2
,
3
]}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
if
'reduce_all'
in
self
.
attrs
:
if
self
.
attrs
[
'reduce_all'
]
is
True
:
axis
=
None
prod_of_reduced_dims
=
np
.
asarray
(
self
.
inputs
[
'X'
].
shape
).
prod
()
keepdim
=
False
if
'keep_dim'
in
self
.
attrs
:
keepdim
=
True
self
.
grad_Out
=
self
.
x_fp32
.
sum
(
axis
=
axis
,
keepdims
=
keepdim
)
self
.
grad_Out
=
np
.
atleast_1d
(
self
.
grad_Out
)
self
.
grad_X
=
tmp_tensor
+
self
.
grad_Out
# broadcast grad
if
self
.
op_type
==
'reduce_mean'
:
self
.
grad_X
/=
prod_of_reduced_dims
class
TestReduceDefaultWithGradBF16OneDNNOp
(
TestReduceSumDefaultBF16OneDNNOp
):
def
test_check_grad
(
self
):
self
.
calculate_grads
()
self
.
check_grad_with_place
(
core
.
CPUPlace
(),
[
"X"
],
"Out"
,
check_dygraph
=
False
,
user_defined_grads
=
[
self
.
grad_X
],
user_defined_grad_outputs
=
[
convert_float_to_uint16
(
self
.
grad_Out
)])
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONEDNNOp
):
class
TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp
(
TestReduceDefaultWithGradBF16OneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
normal
(
size
=
(
2
,
7
,
3
,
5
)).
astype
(
'float32'
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
-
1
,
-
2
,
-
3
,
-
4
]}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
self
.
x_fp32
=
np
.
random
.
normal
(
size
=
(
2
,
3
,
5
,
6
)).
astype
(
'float32'
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
0
,
1
,
2
,
3
]}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum5DKeepDimsONEDNNOp
(
TestReduceSumDefaultBF16ONE
DNNOp
):
class
TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16OneDNNOp
(
TestReduceDefaultWithGradBF16One
DNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
2
,
5
,
3
,
2
,
2
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
attrs
=
{
'dim'
:
(
2
,
3
,
4
),
'keep_dim'
:
True
,
'use_mkldnn'
:
True
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]),
keepdims
=
self
.
attrs
[
'keep_dim'
])
}
self
.
x_fp32
=
np
.
random
.
normal
(
size
=
(
4
,
7
,
6
,
6
)).
astype
(
'float32'
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
-
1
,
-
2
,
-
3
,
-
4
]}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum5DReduceAllKeepDimsBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONEDNNOp
):
class
TestReduceSum5DReduceAllKeepDimsBF16OneDNNOp
(
TestReduceDefaultWithGradBF16OneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
normal
(
size
=
(
2
,
5
,
3
,
2
,
4
)).
astype
(
'float32'
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
normal
(
size
=
(
2
,
5
,
3
,
2
,
5
)).
astype
(
'float32'
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'reduce_all'
:
True
,
'keep_dim'
:
True
,
'use_mkldnn'
:
True
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
keepdims
=
self
.
attrs
[
'keep_dim'
])}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
keepdims
=
self
.
attrs
[
'keep_dim'
])}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONE
DNNOp
):
class
TestReduceSum4DReduceAllBF16OneDNNOp
(
TestReduceDefaultWithGradBF16One
DNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
normal
(
size
=
(
4
,
3
,
2
,
3
)).
astype
(
'float32'
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
normal
(
size
=
(
4
,
5
,
4
,
5
)).
astype
(
'float32'
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'reduce_all'
:
True
,
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
()}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
()}
@
skip_check_grad_ci
(
reason
=
"reduce_max is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMax3DBF16O
NEDNNOp
(
TestReduceSumDefaultBF16ONE
DNNOp
):
class
TestReduceMax3DBF16O
neDNNOp
(
TestReduceSumDefaultBF16One
DNNOp
):
"""Remove Max with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
self
.
op_type
=
"reduce_max"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'dim'
:
[
-
1
],
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
max
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
max
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
@
skip_check_grad_ci
(
reason
=
"reduce_max is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMax4DNegativeAndPositiveDimsBF16O
NE
DNNOp
(
TestReduceSumDefaultBF16O
NE
DNNOp
):
class
TestReduceMax4DNegativeAndPositiveDimsBF16O
ne
DNNOp
(
TestReduceSumDefaultBF16O
ne
DNNOp
):
"""Remove Max with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
self
.
op_type
=
"reduce_max"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
,
9
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
,
9
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'dim'
:
[
-
1
,
0
,
1
],
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
max
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
max
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
@
skip_check_grad_ci
(
reason
=
"reduce_min is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMin3DBF16O
NEDNNOp
(
TestReduceSumDefaultBF16ONE
DNNOp
):
class
TestReduceMin3DBF16O
neDNNOp
(
TestReduceSumDefaultBF16One
DNNOp
):
"""Remove Min with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
self
.
op_type
=
"reduce_min"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'dim'
:
[
2
],
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
min
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
min
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceMean3DBF16ONEDNNOp
(
TestReduceSumDefaultBF16ONEDNNOp
):
class
TestReduceMean3DBF16OneDNNOp
(
TestReduceDefaultWithGradBF16OneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_mean"
self
.
use_mkldnn
=
True
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
x_bf16
=
convert_float_to_uint16
(
x_fp32
)
self
.
inputs
=
{
'X'
:
x_bf16
}
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
10
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
x_fp32
.
sum
(
axis
=
0
)
/
x_fp32
.
shape
[
0
]}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
axis
=
0
)
/
self
.
x_fp32
.
shape
[
0
]}
class
TestReduceMean4DBF16OneDNNOp
(
TestReduceDefaultWithGradBF16OneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_mean"
self
.
use_mkldnn
=
True
self
.
x_fp32
=
np
.
random
.
random
((
5
,
6
,
3
,
5
)).
astype
(
"float32"
)
self
.
x_bf16
=
convert_float_to_uint16
(
self
.
x_fp32
)
self
.
inputs
=
{
'X'
:
self
.
x_bf16
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
0
,
1
]}
self
.
outputs
=
{
'Out'
:
self
.
x_fp32
.
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
/
(
self
.
x_fp32
.
shape
[
0
]
*
self
.
x_fp32
.
shape
[
1
])
}
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py
浏览文件 @
ead83422
...
...
@@ -19,8 +19,7 @@ import paddle.fluid as fluid
import
paddle
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSumDefaultONEDNNOp
(
OpTest
):
class
TestReduceSumDefaultOneDNNOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
...
...
@@ -32,46 +31,35 @@ class TestReduceSumDefaultONEDNNOp(OpTest):
self
.
check_output
()
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
10
,
5
,
5
)).
astype
(
"float32"
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
2
]}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
class
TestReduceDefaultWithGradOneDNNOp
(
TestReduceSumDefaultOneDNNOp
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllWithoutReduceAllAttributeONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceSum4DOneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
10
,
5
,
5
)).
astype
(
"float32"
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
0
,
1
,
2
,
3
]}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
2
]}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
10
,
5
,
5
)).
astype
(
"float32"
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
-
1
,
-
2
,
-
3
,
-
4
]}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
5
,
10
,
5
,
3
)).
astype
(
"float32"
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'dim'
:
[
0
,
1
,
2
,
3
]}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
sum
(
axis
=
tuple
(
self
.
attrs
[
'dim'
]))
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum5DKeepDimsONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceSum5DKeepDimsOneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
...
...
@@ -83,8 +71,8 @@ class TestReduceSum5DKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp):
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum5DReduceAllKeepDimsONEDNNOp
(
TestReduceSumDefaultONE
DNNOp
):
class
TestReduceSum5DReduceAllKeepDimsOneDNNOp
(
TestReduceDefaultWithGradOne
DNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
...
...
@@ -95,8 +83,7 @@ class TestReduceSum5DReduceAllKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp):
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceSum4DReduceAllONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceSum4DReduceAllOneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_sum"
self
.
use_mkldnn
=
True
...
...
@@ -108,7 +95,7 @@ class TestReduceSum4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp):
@
skip_check_grad_ci
(
reason
=
"reduce_max is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMax3DO
NEDNNOp
(
TestReduceSumDefaultONE
DNNOp
):
class
TestReduceMax3DO
neDNNOp
(
TestReduceSumDefaultOne
DNNOp
):
"""Remove Max with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
...
...
@@ -124,8 +111,8 @@ class TestReduceMax3DONEDNNOp(TestReduceSumDefaultONEDNNOp):
@
skip_check_grad_ci
(
reason
=
"reduce_max is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMax4DNegativeAndPositiveDimsO
NE
DNNOp
(
TestReduceSumDefaultO
NE
DNNOp
):
class
TestReduceMax4DNegativeAndPositiveDimsO
ne
DNNOp
(
TestReduceSumDefaultO
ne
DNNOp
):
"""Remove Max with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
...
...
@@ -141,7 +128,7 @@ class TestReduceMax4DNegativeAndPositiveDimsONEDNNOp(
@
skip_check_grad_ci
(
reason
=
"reduce_min is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
class
TestReduceMin3DO
NEDNNOp
(
TestReduceSumDefaultONE
DNNOp
):
class
TestReduceMin3DO
neDNNOp
(
TestReduceSumDefaultOne
DNNOp
):
"""Remove Min with subgradient from gradient check to confirm the success of CI."""
def
setUp
(
self
):
...
...
@@ -154,8 +141,7 @@ class TestReduceMin3DONEDNNOp(TestReduceSumDefaultONEDNNOp):
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceMean3DONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceMean3DOneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_mean"
self
.
use_mkldnn
=
True
...
...
@@ -166,8 +152,7 @@ class TestReduceMean3DONEDNNOp(TestReduceSumDefaultONEDNNOp):
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceMean4DReduceAllONEDNNOp
(
TestReduceSumDefaultONEDNNOp
):
class
TestReduceMean4DReduceAllOneDNNOp
(
TestReduceDefaultWithGradOneDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_mean"
self
.
use_mkldnn
=
True
...
...
@@ -179,16 +164,6 @@ class TestReduceMean4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp):
}
@
skip_check_grad_ci
(
reason
=
"not implemented"
)
class
TestReduceMeanNoReduce1DOp
(
TestReduceSumDefaultONEDNNOp
):
def
setUp
(
self
):
self
.
op_type
=
"reduce_mean"
self
.
use_mkldnn
=
True
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
1
)).
astype
(
"float32"
)}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
]}
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录