Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
91119271
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
91119271
编写于
10月 09, 2021
作者:
Y
Yiqun Liu
提交者:
GitHub
10月 09, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enhance OpTest for bfloat16. (#36079)
上级
cb620ca6
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
106 addition
and
51 deletion
+106
-51
paddle/fluid/operators/cast_op.cu
paddle/fluid/operators/cast_op.cu
+14
-19
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+58
-28
python/paddle/fluid/tests/unittests/test_cast_op.py
python/paddle/fluid/tests/unittests/test_cast_op.py
+34
-4
未找到文件。
paddle/fluid/operators/cast_op.cu
浏览文件 @
91119271
...
...
@@ -94,24 +94,19 @@ class CastCUDAOpKernel : public framework::OpKernel<InT> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
#ifdef PADDLE_WITH_HIP
REGISTER_OP_CUDA_KERNEL
(
cast
,
ops
::
CastCUDAOpKernel
<
float
>
,
ops
::
CastCUDAOpKernel
<
double
>
,
ops
::
CastCUDAOpKernel
<
int
>
,
ops
::
CastCUDAOpKernel
<
int64_t
>
,
ops
::
CastCUDAOpKernel
<
int16_t
>
,
ops
::
CastCUDAOpKernel
<
bool
>
,
ops
::
CastCUDAOpKernel
<
uint8_t
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
float16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
double
>>
);
namespace
plat
=
paddle
::
platform
;
#define REGISTER_CAST_CUDA_BASE(op_name, ...) \
REGISTER_OP_CUDA_KERNEL( \
op_name, ops::CastCUDAOpKernel<float>, ops::CastCUDAOpKernel<double>, \
ops::CastCUDAOpKernel<int>, ops::CastCUDAOpKernel<int64_t>, \
ops::CastCUDAOpKernel<int16_t>, ops::CastCUDAOpKernel<bool>, \
ops::CastCUDAOpKernel<uint8_t>, ops::CastCUDAOpKernel<plat::float16>, \
ops::CastCUDAOpKernel<plat::complex<float>>, \
ops::CastCUDAOpKernel<plat::complex<double>>, ##__VA_ARGS__);
#if !defined(PADDLE_WITH_HIP)
REGISTER_CAST_CUDA_BASE
(
cast
,
ops
::
CastCUDAOpKernel
<
plat
::
bfloat16
>
)
#else
REGISTER_OP_CUDA_KERNEL
(
cast
,
ops
::
CastCUDAOpKernel
<
float
>
,
ops
::
CastCUDAOpKernel
<
double
>
,
ops
::
CastCUDAOpKernel
<
int
>
,
ops
::
CastCUDAOpKernel
<
int64_t
>
,
ops
::
CastCUDAOpKernel
<
int16_t
>
,
ops
::
CastCUDAOpKernel
<
bool
>
,
ops
::
CastCUDAOpKernel
<
uint8_t
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
float16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
bfloat16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
double
>>
);
REGISTER_CAST_CUDA_BASE
(
cast
)
#endif
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
91119271
...
...
@@ -147,6 +147,9 @@ def get_numeric_gradient(place,
op
.
run
(
scope
,
place
)
for
output_name
in
output_names
:
output_numpy
=
np
.
array
(
scope
.
find_var
(
output_name
).
get_tensor
())
# numpy.dtype does not have bfloat16, thus we use numpy.uint16 to
# store bfloat16 data, and need to be converted to float to check
# the floating precision.
if
tensor_to_check
.
_dtype
()
==
core
.
VarDesc
.
VarType
.
BF16
:
output_numpy
=
convert_uint16_to_float
(
output_numpy
)
sum
.
append
(
output_numpy
.
astype
(
tensor_to_check_dtype
).
mean
())
...
...
@@ -362,11 +365,26 @@ class OpTest(unittest.TestCase):
self
.
dtype
=
data_type
def
is_bfloat16_op
(
self
):
# self.dtype is the dtype of inputs, and is set in infer_dtype_from_inputs_outputs.
# Make sure this function is called after calling infer_dtype_from_inputs_outputs.
return
self
.
dtype
==
np
.
uint16
or
(
hasattr
(
self
,
'mkldnn_data_type'
)
and
getattr
(
self
,
'mkldnn_data_type'
)
is
"bfloat16"
)
or
(
hasattr
(
self
,
'attrs'
)
and
'mkldnn_data_type'
in
self
.
attrs
and
self
.
attrs
[
'mkldnn_data_type'
]
==
'bfloat16'
)
hasattr
(
self
,
'output_dtype'
)
and
self
.
output_dtype
==
np
.
uint16
)
or
(
hasattr
(
self
,
'mkldnn_data_type'
)
and
getattr
(
self
,
'mkldnn_data_type'
)
is
"bfloat16"
)
or
(
hasattr
(
self
,
'attrs'
)
and
'mkldnn_data_type'
in
self
.
attrs
and
self
.
attrs
[
'mkldnn_data_type'
]
==
'bfloat16'
)
def
is_mkldnn_op
(
self
):
return
(
hasattr
(
self
,
"use_mkldnn"
)
and
self
.
use_mkldnn
==
True
)
or
(
hasattr
(
self
,
"attrs"
)
and
"use_mkldnn"
in
self
.
attrs
and
self
.
attrs
[
"use_mkldnn"
]
==
True
)
def
is_xpu_op
(
self
):
return
(
hasattr
(
self
,
"use_xpu"
)
and
self
.
use_xpu
==
True
)
or
(
hasattr
(
self
,
"attrs"
)
and
"use_xpu"
in
self
.
attrs
and
self
.
attrs
[
"use_xpu"
]
==
True
)
def
infer_dtype_from_inputs_outputs
(
self
,
inputs
,
outputs
):
def
is_np_data
(
input
):
...
...
@@ -398,8 +416,8 @@ class OpTest(unittest.TestCase):
# infer dtype from inputs, and dtype means the precision of the test
# collect dtype of all inputs
dtype_set
=
set
()
infer_dtype
(
inputs
,
dtype_set
)
input_
dtype_set
=
set
()
infer_dtype
(
inputs
,
input_
dtype_set
)
dtype_list
=
[
np
.
dtype
(
np
.
float64
),
np
.
dtype
(
np
.
float32
),
np
.
dtype
(
np
.
float16
),
np
.
dtype
(
np
.
int64
),
np
.
dtype
(
np
.
int32
),
np
.
dtype
(
np
.
uint16
),
...
...
@@ -408,12 +426,20 @@ class OpTest(unittest.TestCase):
]
# check the dtype in dtype_list in order, select the first dtype that in dtype_set
for
dtype
in
dtype_list
:
if
dtype
in
dtype_set
:
if
dtype
in
input_
dtype_set
:
self
.
dtype
=
dtype
break
# save dtype in class attr
# save
input
dtype in class attr
self
.
__class__
.
dtype
=
self
.
dtype
# infer dtype of outputs
output_dtype_set
=
set
()
infer_dtype
(
outputs
,
output_dtype_set
)
for
dtype
in
dtype_list
:
if
dtype
in
output_dtype_set
:
self
.
output_dtype
=
dtype
break
def
feed_var
(
self
,
input_vars
,
place
):
feed_map
=
{}
for
var_name
in
input_vars
:
...
...
@@ -439,14 +465,10 @@ class OpTest(unittest.TestCase):
def
_append_ops
(
self
,
block
):
self
.
__class__
.
op_type
=
self
.
op_type
# for ci check, please not delete it for now
if
(
hasattr
(
self
,
"use_mkldnn"
)
and
self
.
use_mkldnn
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_mkldnn"
in
self
.
attrs
and
\
self
.
attrs
[
"use_mkldnn"
]
==
True
):
if
self
.
is_mkldnn_op
():
self
.
__class__
.
use_mkldnn
=
True
if
(
hasattr
(
self
,
"use_xpu"
)
and
self
.
use_xpu
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_xpu"
in
self
.
attrs
and
\
self
.
attrs
[
"use_xpu"
]
==
True
):
if
self
.
is_xpu_op
():
self
.
__class__
.
use_xpu
=
True
op_proto
=
OpProtoHolder
.
instance
().
get_op_proto
(
self
.
op_type
)
...
...
@@ -1092,12 +1114,15 @@ class OpTest(unittest.TestCase):
atol
=
0
if
self
.
is_bfloat16_op
():
check_dygraph
=
False
if
hasattr
(
self
,
'force_fp32_output'
)
and
getattr
(
self
,
'force_fp32_output'
):
atol
=
1e-2
if
self
.
is_mkldnn_op
():
check_dygraph
=
False
if
hasattr
(
self
,
'force_fp32_output'
)
and
getattr
(
self
,
'force_fp32_output'
):
atol
=
1e-2
else
:
atol
=
2
else
:
atol
=
2
atol
=
1e-
2
if
no_check_set
is
not
None
:
if
self
.
op_type
not
in
no_check_set_white_list
.
no_check_set_white_list
:
...
...
@@ -1193,6 +1218,7 @@ class OpTest(unittest.TestCase):
expect
=
self
.
outputs
[
out_name
]
expect_t
=
expect
[
0
]
if
isinstance
(
expect
,
tuple
)
else
expect
# np.uint16 represents bfloat16
if
actual_t
.
dtype
==
np
.
uint16
and
expect_t
.
dtype
in
[
np
.
float32
,
np
.
float64
]:
...
...
@@ -1205,6 +1231,7 @@ class OpTest(unittest.TestCase):
expect_t
=
convert_uint16_to_float
(
expect_t
)
actual_t
=
convert_uint16_to_float
(
actual_t
)
atol
=
max
(
atol
,
0.03
)
# NOTE(zhiqiu): np.allclose([], [1.]) returns True
# see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng
if
expect_t
.
size
==
0
:
...
...
@@ -1214,13 +1241,19 @@ class OpTest(unittest.TestCase):
np
.
allclose
(
actual_t
,
expect_t
,
rtol
=
rtol
,
atol
=
atol
,
rtol
=
rtol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
if
check_dygraph
:
if
self
.
is_bfloat16_op
():
if
imperative_actual_t
.
dtype
==
np
.
uint16
:
imperative_actual_t
=
convert_uint16_to_float
(
imperative_actual_t
)
if
expect_t
.
dtype
==
np
.
uint16
:
expect_t
=
convert_uint16_to_float
(
expect_t
)
if
six
.
moves
.
reduce
(
lambda
x
,
y
:
x
*
y
,
imperative_actual_t
.
shape
,
1
)
==
0
and
six
.
moves
.
reduce
(
...
...
@@ -1232,6 +1265,7 @@ class OpTest(unittest.TestCase):
imperative_actual_t
,
expect_t
,
atol
=
atol
,
rtol
=
rtol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
...
...
@@ -1340,14 +1374,10 @@ class OpTest(unittest.TestCase):
check_dygraph
=
True
,
inplace_atol
=
None
):
self
.
__class__
.
op_type
=
self
.
op_type
if
(
hasattr
(
self
,
"use_mkldnn"
)
and
self
.
use_mkldnn
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_mkldnn"
in
self
.
attrs
and
\
self
.
attrs
[
"use_mkldnn"
]
==
True
):
if
self
.
is_mkldnn_op
():
self
.
__class__
.
use_mkldnn
=
True
if
(
hasattr
(
self
,
"use_xpu"
)
and
self
.
use_xpu
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_xpu"
in
self
.
attrs
and
\
self
.
attrs
[
"use_xpu"
]
==
True
):
if
self
.
is_xpu_op
():
self
.
__class__
.
use_xpu
=
True
places
=
self
.
_get_places
()
...
...
@@ -1452,10 +1482,10 @@ class OpTest(unittest.TestCase):
op_outputs
=
self
.
outputs
if
hasattr
(
self
,
"outputs"
)
else
dict
()
op_attrs
=
self
.
attrs
if
hasattr
(
self
,
"attrs"
)
else
dict
()
if
self
.
is_bfloat16_op
():
self
.
_check_grad_helper
()
if
self
.
is_bfloat16_op
()
and
self
.
is_mkldnn_op
():
check_dygraph
=
False
self
.
_check_grad_helper
()
if
self
.
dtype
==
np
.
float64
and
\
self
.
op_type
not
in
op_threshold_white_list
.
NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST
:
numeric_grad_delta
=
1e-5
...
...
python/paddle/fluid/tests/unittests/test_cast_op.py
浏览文件 @
91119271
...
...
@@ -14,7 +14,6 @@
from
__future__
import
print_function
import
op_test
import
unittest
import
numpy
as
np
...
...
@@ -22,9 +21,10 @@ import paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
paddle.fluid
import
compiler
,
Program
,
program_guard
from
op_test
import
OpTest
,
convert_uint16_to_float
,
convert_float_to_uint16
class
TestCastOp
1
(
op_test
.
OpTest
):
class
TestCastOp
Fp32ToFp64
(
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
])
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float32'
)}
...
...
@@ -42,7 +42,7 @@ class TestCastOp1(op_test.OpTest):
self
.
check_grad
([
'X'
],
[
'Out'
])
class
TestCastOp
2
(
op_test
.
OpTest
):
class
TestCastOp
Fp16ToFp32
(
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
])
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float16'
)}
...
...
@@ -57,7 +57,7 @@ class TestCastOp2(op_test.OpTest):
self
.
check_output
(
atol
=
1e-3
)
class
TestCastOp
3
(
op_test
.
OpTest
):
class
TestCastOp
Fp32ToFp16
(
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
])
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float32'
)}
...
...
@@ -72,6 +72,36 @@ class TestCastOp3(op_test.OpTest):
self
.
check_output
(
atol
=
1e-3
)
class
TestCastOpBf16ToFp32
(
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
array
(
np
.
random
.
randint
(
10
,
size
=
[
10
,
10
])).
astype
(
'uint16'
)
self
.
inputs
=
{
'X'
:
ipt
}
self
.
outputs
=
{
'Out'
:
convert_uint16_to_float
(
ipt
)}
self
.
attrs
=
{
'in_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
BF16
),
'out_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
)
}
self
.
op_type
=
'cast'
def
test_check_output
(
self
):
self
.
check_output
()
class
TestCastOpFp32ToBf16
(
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
]).
astype
(
'float32'
)
self
.
inputs
=
{
'X'
:
ipt
}
self
.
outputs
=
{
'Out'
:
convert_float_to_uint16
(
ipt
)}
self
.
attrs
=
{
'in_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
),
'out_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
BF16
)
}
self
.
op_type
=
'cast'
def
test_check_output
(
self
):
self
.
check_output
()
class
TestCastOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录