未验证 提交 aaa71ea4 编写于 作者: F furnace 提交者: GitHub

[NPU] fix fp16 (PART I) (#40259)

[NPU] fix fp16 (PART I)
上级 6e1fe4f1
...@@ -76,10 +76,10 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> { ...@@ -76,10 +76,10 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> {
auto *variance_out = ctx.Output<Tensor>("VarianceOut"); auto *variance_out = ctx.Output<Tensor>("VarianceOut");
auto *saved_mean = ctx.Output<Tensor>("SavedMean"); auto *saved_mean = ctx.Output<Tensor>("SavedMean");
auto *saved_variance = ctx.Output<Tensor>("SavedVariance"); auto *saved_variance = ctx.Output<Tensor>("SavedVariance");
mean_out->mutable_data<T>(ctx.GetPlace()); mean_out->mutable_data<float>(ctx.GetPlace());
variance_out->mutable_data<T>(ctx.GetPlace()); variance_out->mutable_data<float>(ctx.GetPlace());
saved_mean->mutable_data<T>(ctx.GetPlace()); saved_mean->mutable_data<float>(ctx.GetPlace());
saved_variance->mutable_data<T>(ctx.GetPlace()); saved_variance->mutable_data<float>(ctx.GetPlace());
// if MomentumTensor is set, use MomentumTensor value, momentum // if MomentumTensor is set, use MomentumTensor value, momentum
// is only used in this training branch // is only used in this training branch
...@@ -170,8 +170,8 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel<T> { ...@@ -170,8 +170,8 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream(); auto stream = ctx.template device_context<NPUDeviceContext>().stream();
if (d_scale && d_bias) { if (d_scale && d_bias) {
d_scale->mutable_data<T>(ctx.GetPlace()); d_scale->mutable_data<float>(ctx.GetPlace());
d_bias->mutable_data<T>(ctx.GetPlace()); d_bias->mutable_data<float>(ctx.GetPlace());
if (use_global_stats) { if (use_global_stats) {
const auto *running_mean = ctx.Input<Tensor>("Mean"); const auto *running_mean = ctx.Input<Tensor>("Mean");
const auto *running_variance = ctx.Input<Tensor>("Variance"); const auto *running_variance = ctx.Input<Tensor>("Variance");
......
...@@ -144,6 +144,7 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -144,6 +144,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
def setUp(self): def setUp(self):
self.set_npu() self.set_npu()
self.init_dtype()
self.use_mkldnn = False self.use_mkldnn = False
self.fuse_with_relu = False self.fuse_with_relu = False
self.data_formats = ["NCHW", "NHWC"] self.data_formats = ["NCHW", "NHWC"]
...@@ -153,6 +154,9 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -153,6 +154,9 @@ class TestBatchNormOpTraining(unittest.TestCase):
self.init_kernel_type() self.init_kernel_type()
self.init_test_case() self.init_test_case()
def init_dtype(self):
self.dtype = np.float32
def init_test_case(self): def init_test_case(self):
self.use_global_stats = False self.use_global_stats = False
self.no_grad_set = set() self.no_grad_set = set()
...@@ -210,11 +214,16 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -210,11 +214,16 @@ class TestBatchNormOpTraining(unittest.TestCase):
scale_shape = [c] scale_shape = [c]
np.random.seed(123) np.random.seed(123)
x = np.random.random_sample(shape).astype(np.float32) x = np.random.random_sample(shape).astype(self.dtype)
scale = np.random.random_sample(scale_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32)
mean, variance = self.set_mean_variance(scale_shape, x, data_layout) mean, variance = self.set_mean_variance(scale_shape, x, data_layout)
y_grad = np.random.random_sample(shape).astype(np.float32)
if self.dtype == np.float16:
mean = mean.astype(np.float32)
variance = variance.astype(np.float32)
y_grad = np.random.random_sample(shape).astype(self.dtype)
momentum_var = np.array([momentum]).astype(np.float32) momentum_var = np.array([momentum]).astype(np.float32)
y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward(
...@@ -275,7 +284,7 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -275,7 +284,7 @@ class TestBatchNormOpTraining(unittest.TestCase):
inputs=inputs, inputs=inputs,
outputs=outputs, outputs=outputs,
attrs=attrs) attrs=attrs)
block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) block.create_var(name='y@GRAD', dtype=self.dtype, shape=y.shape)
# generate backward op_desc # generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
...@@ -320,6 +329,11 @@ class TestBatchNormOpTraining(unittest.TestCase): ...@@ -320,6 +329,11 @@ class TestBatchNormOpTraining(unittest.TestCase):
pass pass
class TestFP16BatchNormOpTraining(TestBatchNormOpTraining):
def init_dtype(self):
self.dtype = np.float16
class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining):
def init_test_case(self): def init_test_case(self):
self.use_global_stats = False self.use_global_stats = False
......
...@@ -51,8 +51,6 @@ class TestCos(OpTest): ...@@ -51,8 +51,6 @@ class TestCos(OpTest):
self.check_output_with_place(self.place, atol=1e-7) self.check_output_with_place(self.place, atol=1e-7)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
......
...@@ -56,8 +56,6 @@ class TestDropoutOp(OpTest): ...@@ -56,8 +56,6 @@ class TestDropoutOp(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad_normal(self): def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
......
...@@ -50,8 +50,6 @@ class TestExpNPUOP(OpTest): ...@@ -50,8 +50,6 @@ class TestExpNPUOP(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
def init_dtype(self): def init_dtype(self):
......
...@@ -59,9 +59,6 @@ class TestNPUHardSigmoid(OpTest): ...@@ -59,9 +59,6 @@ class TestNPUHardSigmoid(OpTest):
self.check_output_with_place(self.place, atol=1e-5) self.check_output_with_place(self.place, atol=1e-5)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
def set_npu(self): def set_npu(self):
......
...@@ -66,8 +66,6 @@ class TestHardSwishNPU(OpTest): ...@@ -66,8 +66,6 @@ class TestHardSwishNPU(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
# There is a problem that precision of grad result using float32 # There is a problem that precision of grad result using float32
# can't satisfy the default precision requirement # can't satisfy the default precision requirement
# when compared with numeric_grads, but the results on # when compared with numeric_grads, but the results on
......
...@@ -50,12 +50,8 @@ class TestLog(OpTest): ...@@ -50,12 +50,8 @@ class TestLog(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
# TODO(ascendrc): Add grad test def test_check_grad(self):
# def test_check_grad(self): self.check_grad(['X'], 'Out')
# if self.dtype == np.float16:
# return
# self.check_grad(['X'], 'Out')
#
class TestLogFp16(OpTest): class TestLogFp16(OpTest):
......
...@@ -54,9 +54,6 @@ class TestNPUNormOp(OpTest): ...@@ -54,9 +54,6 @@ class TestNPUNormOp(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place( self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.006) self.place, ['X'], 'Out', max_relative_error=0.006)
......
...@@ -51,8 +51,6 @@ class TestPnormOp(OpTest): ...@@ -51,8 +51,6 @@ class TestPnormOp(OpTest):
self.check_output_with_place(paddle.NPUPlace(0)) self.check_output_with_place(paddle.NPUPlace(0))
def test_check_grad(self): def test_check_grad(self):
if self.dtype == "float16":
return
self.check_grad_with_place( self.check_grad_with_place(
paddle.NPUPlace(0), ['X'], 'Out', user_defined_grads=self.gradient) paddle.NPUPlace(0), ['X'], 'Out', user_defined_grads=self.gradient)
......
...@@ -67,9 +67,6 @@ def create_test_fp16_class(parent): ...@@ -67,9 +67,6 @@ def create_test_fp16_class(parent):
self.use_cudnn = False self.use_cudnn = False
self.dtype = np.float16 self.dtype = np.float16
def test_check_grad(self):
return
cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op") cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op")
TestFp16Case.__name__ = cls_name TestFp16Case.__name__ = cls_name
globals()[cls_name] = TestFp16Case globals()[cls_name] = TestFp16Case
......
...@@ -40,8 +40,6 @@ class TestNPUReciprocal(OpTest): ...@@ -40,8 +40,6 @@ class TestNPUReciprocal(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place( self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.01) self.place, ['X'], 'Out', max_relative_error=0.01)
......
...@@ -56,8 +56,6 @@ class TestRelu6(OpTest): ...@@ -56,8 +56,6 @@ class TestRelu6(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
def init_dtype(self): def init_dtype(self):
......
...@@ -44,8 +44,6 @@ class TestNPUSigmoid(OpTest): ...@@ -44,8 +44,6 @@ class TestNPUSigmoid(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place( self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.01) self.place, ['X'], 'Out', max_relative_error=0.01)
......
...@@ -87,8 +87,6 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): ...@@ -87,8 +87,6 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
# fp32 has low precision, cpu and npu both need to relax the max_relative_error if using fp32 # fp32 has low precision, cpu and npu both need to relax the max_relative_error if using fp32
self.check_grad_with_place( self.check_grad_with_place(
self.place, ['Logits'], self.place, ['Logits'],
......
...@@ -50,12 +50,11 @@ class TestSqrt(OpTest): ...@@ -50,12 +50,11 @@ class TestSqrt(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
# TODO(ascendrc): Add grad test def test_check_grad(self):
# def test_check_grad(self): if self.dtype == np.float16:
# if self.dtype == np.float16: self.check_grad(['X'], 'Out', max_relative_error=0.009)
# return else:
# self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', max_relative_error=0.009)
#
class TestSqrtFp16(OpTest): class TestSqrtFp16(OpTest):
......
...@@ -51,8 +51,6 @@ class TestSquare(OpTest): ...@@ -51,8 +51,6 @@ class TestSquare(OpTest):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
def test_check_grad(self): def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out') self.check_grad_with_place(self.place, ['X'], 'Out')
......
...@@ -50,12 +50,11 @@ class TestTanh(OpTest): ...@@ -50,12 +50,11 @@ class TestTanh(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output_with_place(self.place) self.check_output_with_place(self.place)
# TODO(ascendrc): Add grad test def test_check_grad(self):
# def test_check_grad(self): if self.dtype == np.float16:
# if self.dtype == np.float16: self.check_grad(['X'], 'Out', max_relative_error=0.009)
# return else:
# self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out', max_relative_error=0.009)
#
class TestTanhFp16(OpTest): class TestTanhFp16(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册