diff --git a/paddle/phi/kernels/funcs/eigen/sign.cu b/paddle/phi/kernels/funcs/eigen/sign.cu index 4caed688013dc7a4e34bdabd2d7707e81a07be07..b630ba7bb6c4082cceab32283e68864eb0965655 100644 --- a/paddle/phi/kernels/funcs/eigen/sign.cu +++ b/paddle/phi/kernels/funcs/eigen/sign.cu @@ -32,6 +32,7 @@ struct EigenSign { template struct EigenSign; template struct EigenSign; template struct EigenSign; +template struct EigenSign; } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu index 057f7e465c055912b0f0dac3c9b644693ff27ed1..a2ec60109d6404a7c7b99efb642837d7f3ea8f03 100644 --- a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu @@ -161,5 +161,6 @@ PD_REGISTER_KERNEL(overlap_add_grad, float, double, phi::dtype::float16, + phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/overlap_add_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_kernel.cu index cf56095db5ea7fd4a8729a4144727ca512efc385..b8726b8d8e15adde8a4152505b009996cecc8ab6 100644 --- a/paddle/phi/kernels/gpu/overlap_add_kernel.cu +++ b/paddle/phi/kernels/gpu/overlap_add_kernel.cu @@ -147,5 +147,6 @@ PD_REGISTER_KERNEL(overlap_add, float, double, phi::dtype::float16, + phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/sign_kernel.cu.cc b/paddle/phi/kernels/gpu/sign_kernel.cu.cc index 37f10243dc596de1e5f5b0b420d925f5e7ebf759..71cd1d39b687d6a889209899f39acec5368883fb 100644 --- a/paddle/phi/kernels/gpu/sign_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/sign_kernel.cu.cc @@ -19,9 +19,13 @@ limitations under the License. */ #include "paddle/phi/kernels/impl/sign_kernel_impl.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/phi/common/float16.h" - -using float16 = phi::dtype::float16; - -PD_REGISTER_KERNEL( - sign, GPU, ALL_LAYOUT, phi::SignKernel, float, double, float16) {} +#include "paddle/phi/common/amp_type_traits.h" + +PD_REGISTER_KERNEL(sign, + GPU, + ALL_LAYOUT, + phi::SignKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py index d0e5cd79c3b899b4072939677e5ba64b6d9f62e7..98d4ce10aaabb47b1732ba390e14cbc8b78dcadf 100644 --- a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py @@ -15,14 +15,15 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle +from paddle.fluid import core def overlap_add(x, hop_length, axis=-1): assert axis in [0, -1], 'axis should be 0/-1.' - assert len(x.shape) >= 2, 'Input dims shoulb be >= 2.' + assert len(x.shape) >= 2, 'Input dims should be >= 2.' squeeze_output = False if len(x.shape) == 2: @@ -101,6 +102,58 @@ class TestOverlapAddOp(OpTest): paddle.disable_static() +class TestOverlapAddFP16Op(TestOverlapAddOp): + def initTestCase(self): + input_shape = (50, 3) + input_type = 'float16' + attrs = { + 'hop_length': 4, + 'axis': -1, + } + return input_shape, input_type, attrs + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support bfloat16", +) +class TestOverlapAddBF16Op(OpTest): + def setUp(self): + self.op_type = "overlap_add" + self.python_api = paddle.signal.overlap_add + self.shape, self.type, self.attrs = self.initTestCase() + self.np_dtype = np.float32 + self.dtype = np.uint16 + self.inputs = { + 'X': np.random.random(size=self.shape).astype(self.np_dtype), + } + self.outputs = {'Out': overlap_add(x=self.inputs['X'], **self.attrs)} + + self.inputs['X'] = convert_float_to_uint16(self.inputs['X']) + self.outputs['Out'] = convert_float_to_uint16(self.outputs['Out']) + self.place = core.CUDAPlace(0) + + def initTestCase(self): + input_shape = (50, 3) + input_type = np.uint16 + attrs = { + 'hop_length': 4, + 'axis': -1, + } + return input_shape, input_type, attrs + + def test_check_output(self): + paddle.enable_static() + self.check_output_with_place(self.place) + paddle.disable_static() + + def test_check_grad_normal(self): + paddle.enable_static() + self.check_grad_with_place(self.place, ['X'], 'Out') + paddle.disable_static() + + class TestCase1(TestOverlapAddOp): def initTestCase(self): input_shape = (3, 50) diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index 79ee4ceff5f219c8c6f7cac3b5b23d82ba08c122..2617c2451f330dd295fdefe1473ec0a9127be8d9 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -17,7 +17,7 @@ import unittest import gradient_checker import numpy as np from decorator_helper import prog_scope -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle from paddle import fluid @@ -40,6 +40,42 @@ class TestSignOp(OpTest): self.check_grad(['X'], 'Out') +class TestSignFP16Op(TestSignOp): + def setUp(self): + self.op_type = "sign" + self.python_api = paddle.sign + self.inputs = { + 'X': np.random.uniform(-10, 10, (10, 10)).astype("float16") + } + self.outputs = {'Out': np.sign(self.inputs['X'])} + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support bfloat16", +) +class TestSignBF16Op(OpTest): + def setUp(self): + self.op_type = "sign" + self.python_api = paddle.sign + self.dtype = np.uint16 + self.inputs = { + 'X': np.random.uniform(-10, 10, (10, 10)).astype("float32") + } + self.outputs = {'Out': np.sign(self.inputs['X'])} + + self.inputs['X'] = convert_float_to_uint16(self.inputs['X']) + self.outputs['Out'] = convert_float_to_uint16(self.outputs['Out']) + self.place = core.CUDAPlace(0) + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + class TestSignOpError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): @@ -97,7 +133,7 @@ class TestSignDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): - # the shape of input variable should be clearly specified, not inlcude -1. + # the shape of input variable should be clearly specified, not include -1. eps = 0.005 dtype = np.float32 @@ -128,7 +164,7 @@ class TestSignTripleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): - # the shape of input variable should be clearly specified, not inlcude -1. + # the shape of input variable should be clearly specified, not include -1. eps = 0.005 dtype = np.float32 diff --git a/python/paddle/signal.py b/python/paddle/signal.py index e404ec08ffb903a587901f7f44004da25f5c0a18..e1580b0007549212419f32c37d279bb1b0f7614b 100644 --- a/python/paddle/signal.py +++ b/python/paddle/signal.py @@ -219,7 +219,10 @@ def overlap_add(x, hop_length, axis=-1, name=None): out = op(x, *attrs) else: check_variable_and_dtype( - x, 'x', ['int32', 'int64', 'float16', 'float32', 'float64'], op_type + x, + 'x', + ['int32', 'int64', 'float16', 'float32', 'float64', 'uint16'], + op_type, ) helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype(input_param_name='x') diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 1e969be880401ef5e6f1444473112eb04e9f1b1e..fe41200378793d1c52cb7c74701bdb6b824808c5 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3677,7 +3677,7 @@ def sign(x, name=None): return _C_ops.sign(x) else: check_variable_and_dtype( - x, 'x', ['float16', 'float32', 'float64'], 'sign' + x, 'x', ['float16', 'float32', 'float64', 'uint16'], 'sign' ) helper = LayerHelper("sign", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype)