Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
aaa71ea4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
aaa71ea4
编写于
3月 18, 2022
作者:
F
furnace
提交者:
GitHub
3月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] fix fp16 (PART I) (#40259)
[NPU] fix fp16 (PART I)
上级
6e1fe4f1
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
35 addition
and
56 deletion
+35
-56
paddle/fluid/operators/batch_norm_op_npu.cc
paddle/fluid/operators/batch_norm_op_npu.cc
+6
-6
python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py
...addle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py
+17
-3
python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py
...n/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py
...dle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py
+0
-3
python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py
...addle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
+2
-6
python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py
+0
-3
python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
...on/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py
...on/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py
+0
-3
python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py
...addle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py
...n/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
...s/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
+5
-6
python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
...on/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
+0
-2
python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
+5
-6
未找到文件。
paddle/fluid/operators/batch_norm_op_npu.cc
浏览文件 @
aaa71ea4
...
...
@@ -76,10 +76,10 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> {
auto
*
variance_out
=
ctx
.
Output
<
Tensor
>
(
"VarianceOut"
);
auto
*
saved_mean
=
ctx
.
Output
<
Tensor
>
(
"SavedMean"
);
auto
*
saved_variance
=
ctx
.
Output
<
Tensor
>
(
"SavedVariance"
);
mean_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
variance_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
saved_mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
saved_variance
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
mean_out
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
variance_out
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
saved_mean
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
saved_variance
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
// if MomentumTensor is set, use MomentumTensor value, momentum
// is only used in this training branch
...
...
@@ -170,8 +170,8 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel<T> {
auto
stream
=
ctx
.
template
device_context
<
NPUDeviceContext
>().
stream
();
if
(
d_scale
&&
d_bias
)
{
d_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
d_scale
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
d_bias
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
if
(
use_global_stats
)
{
const
auto
*
running_mean
=
ctx
.
Input
<
Tensor
>
(
"Mean"
);
const
auto
*
running_variance
=
ctx
.
Input
<
Tensor
>
(
"Variance"
);
...
...
python/paddle/fluid/tests/unittests/npu/test_batch_norm_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -144,6 +144,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
def
setUp
(
self
):
self
.
set_npu
()
self
.
init_dtype
()
self
.
use_mkldnn
=
False
self
.
fuse_with_relu
=
False
self
.
data_formats
=
[
"NCHW"
,
"NHWC"
]
...
...
@@ -153,6 +154,9 @@ class TestBatchNormOpTraining(unittest.TestCase):
self
.
init_kernel_type
()
self
.
init_test_case
()
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
init_test_case
(
self
):
self
.
use_global_stats
=
False
self
.
no_grad_set
=
set
()
...
...
@@ -210,11 +214,16 @@ class TestBatchNormOpTraining(unittest.TestCase):
scale_shape
=
[
c
]
np
.
random
.
seed
(
123
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
np
.
float32
)
x
=
np
.
random
.
random_sample
(
shape
).
astype
(
self
.
dtype
)
scale
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
mean
,
variance
=
self
.
set_mean_variance
(
scale_shape
,
x
,
data_layout
)
y_grad
=
np
.
random
.
random_sample
(
shape
).
astype
(
np
.
float32
)
if
self
.
dtype
==
np
.
float16
:
mean
=
mean
.
astype
(
np
.
float32
)
variance
=
variance
.
astype
(
np
.
float32
)
y_grad
=
np
.
random
.
random_sample
(
shape
).
astype
(
self
.
dtype
)
momentum_var
=
np
.
array
([
momentum
]).
astype
(
np
.
float32
)
y
,
mean_out
,
variance_out
,
saved_mean
,
saved_variance
,
x_grad
,
scale_grad
,
bias_grad
=
self
.
ref_forward_backward
(
...
...
@@ -275,7 +284,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
block
.
create_var
(
name
=
'y@GRAD'
,
dtype
=
'float32'
,
shape
=
y
.
shape
)
block
.
create_var
(
name
=
'y@GRAD'
,
dtype
=
self
.
dtype
,
shape
=
y
.
shape
)
# generate backward op_desc
grad_op_desc_list
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
...
...
@@ -320,6 +329,11 @@ class TestBatchNormOpTraining(unittest.TestCase):
pass
class
TestFP16BatchNormOpTraining
(
TestBatchNormOpTraining
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestBatchNormOpTrainingCase1
(
TestBatchNormOpTraining
):
def
init_test_case
(
self
):
self
.
use_global_stats
=
False
...
...
python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -51,8 +51,6 @@ class TestCos(OpTest):
self
.
check_output_with_place
(
self
.
place
,
atol
=
1e-7
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
...
...
python/paddle/fluid/tests/unittests/npu/test_dropout_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -56,8 +56,6 @@ class TestDropoutOp(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad_normal
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
...
...
python/paddle/fluid/tests/unittests/npu/test_exp_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -50,8 +50,6 @@ class TestExpNPUOP(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
def
init_dtype
(
self
):
...
...
python/paddle/fluid/tests/unittests/npu/test_hard_sigmoid_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -59,9 +59,6 @@ class TestNPUHardSigmoid(OpTest):
self
.
check_output_with_place
(
self
.
place
,
atol
=
1e-5
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
def
set_npu
(
self
):
...
...
python/paddle/fluid/tests/unittests/npu/test_hard_swish_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -66,8 +66,6 @@ class TestHardSwishNPU(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
# There is a problem that precision of grad result using float32
# can't satisfy the default precision requirement
# when compared with numeric_grads, but the results on
...
...
python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -50,12 +50,8 @@ class TestLog(OpTest):
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
# TODO(ascendrc): Add grad test
# def test_check_grad(self):
# if self.dtype == np.float16:
# return
# self.check_grad(['X'], 'Out')
#
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestLogFp16
(
OpTest
):
...
...
python/paddle/fluid/tests/unittests/npu/test_norm_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -54,9 +54,6 @@ class TestNPUNormOp(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.006
)
...
...
python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -51,8 +51,6 @@ class TestPnormOp(OpTest):
self
.
check_output_with_place
(
paddle
.
NPUPlace
(
0
))
def
test_check_grad
(
self
):
if
self
.
dtype
==
"float16"
:
return
self
.
check_grad_with_place
(
paddle
.
NPUPlace
(
0
),
[
'X'
],
'Out'
,
user_defined_grads
=
self
.
gradient
)
...
...
python/paddle/fluid/tests/unittests/npu/test_pool2d_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -67,9 +67,6 @@ def create_test_fp16_class(parent):
self
.
use_cudnn
=
False
self
.
dtype
=
np
.
float16
def
test_check_grad
(
self
):
return
cls_name
=
"{0}_{1}"
.
format
(
parent
.
__name__
,
"Fp16Op"
)
TestFp16Case
.
__name__
=
cls_name
globals
()[
cls_name
]
=
TestFp16Case
...
...
python/paddle/fluid/tests/unittests/npu/test_reciprocal_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -40,8 +40,6 @@ class TestNPUReciprocal(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.01
)
...
...
python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -56,8 +56,6 @@ class TestRelu6(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
def
init_dtype
(
self
):
...
...
python/paddle/fluid/tests/unittests/npu/test_sigmoid_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -44,8 +44,6 @@ class TestNPUSigmoid(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.01
)
...
...
python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -87,8 +87,6 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
# fp32 has low precision, cpu and npu both need to relax the max_relative_error if using fp32
self
.
check_grad_with_place
(
self
.
place
,
[
'Logits'
],
...
...
python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -50,12 +50,11 @@ class TestSqrt(OpTest):
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
# TODO(ascendrc): Add grad test
# def test_check_grad(self):
# if self.dtype == np.float16:
# return
# self.check_grad(['X'], 'Out')
#
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.009
)
else
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.009
)
class
TestSqrtFp16
(
OpTest
):
...
...
python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -51,8 +51,6 @@ class TestSquare(OpTest):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
)
...
...
python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
浏览文件 @
aaa71ea4
...
...
@@ -50,12 +50,11 @@ class TestTanh(OpTest):
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
# TODO(ascendrc): Add grad test
# def test_check_grad(self):
# if self.dtype == np.float16:
# return
# self.check_grad(['X'], 'Out')
#
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.009
)
else
:
self
.
check_grad
([
'X'
],
'Out'
,
max_relative_error
=
0.009
)
class
TestTanhFp16
(
OpTest
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录