diff --git a/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu index 61efe8807643d34acd3700fe36feda9e3f636924..3b7f8a931278e9efebb19a2cb5da203ad7e28c09 100644 --- a/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/uniform_inplace_grad_kernel.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" @@ -41,4 +42,6 @@ PD_REGISTER_KERNEL(uniform_inplace_grad, ALL_LAYOUT, phi::UniformInplaceGradKernel, float, - double) {} + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu index 29bc2f4de5b865800685e2f33e9427f05a5d7920..5c3a886ad87e97ad0eb8a2b3568d331f1c1016a7 100644 --- a/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "gflags/gflags.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" @@ -72,8 +73,12 @@ void UniformInplaceKernel(const Context& ctx, funcs::distribution_and_transform(ctx, out, dist, trans); } else { // Use OP seed - auto func = - UniformGenerator(min, max, seed, diag_num, diag_step, diag_val); + auto func = UniformGenerator(static_cast(min), + static_cast(max), + seed, + diag_num, + diag_step, + static_cast(diag_val)); IndexKernel>(ctx, out, func); } } @@ -85,4 +90,6 @@ PD_REGISTER_KERNEL(uniform_inplace, ALL_LAYOUT, phi::UniformInplaceKernel, float, - double) {} + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py index a704ae9a4ee218b0a629ab366bf03988a5834380..c4a243f47897bb4abf2306abf991904c045c3aff 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py @@ -15,9 +15,19 @@ import unittest import numpy as np +from eager_op_test import OpTest, convert_uint16_to_float import paddle from paddle import fluid +from paddle.fluid import core + + +def output_hist(out): + hist, _ = np.histogram(out, range=(-1, 1)) + hist = hist.astype("float32") + hist /= float(out.size) + prob = 0.1 * np.ones(10) + return hist, prob class TestUniformRandomInplaceOpDtype(unittest.TestCase): @@ -44,6 +54,72 @@ class TestUniformRandomInplaceOpDtype(unittest.TestCase): test_fp64() +class TestUniformRandomInplaceFP16Op(OpTest): + def setUp(self): + self.op_type = "uniform_random_inplace" + self.dtype = np.float16 + self.shape = (1000, 784) + x = np.random.random(self.shape).astype(self.dtype) + y = np.random.random(self.shape).astype(self.dtype) + self.inputs = {"X": x} + self.outputs = {"Out": y} + self.init_attrs() + + def init_attrs(self): + self.output_hist = output_hist + + def test_check_output(self): + self.check_output_customized(self.verify_output) + + def verify_output(self, outs): + hist, prob = self.output_hist(np.array(outs[0])) + np.testing.assert_allclose(hist, prob, rtol=0, atol=0.001) + + # TODO: Due to the lack of the self.python_api=paddle.uniform_random_inplace setting, the dynamic graph is temporarily turned off, set check_dygraph=False + def test_check_grad(self): + self.check_grad(['X'], 'Out', check_dygraph=False) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support bfloat16", +) +class TestUniformRandomInplaceBF16Op(OpTest): + def setUp(self): + self.op_type = "uniform_random_inplace" + self.dtype = np.uint16 + self.shape = (1000, 784) + x = np.random.random(self.shape).astype(self.dtype) + y = np.random.random(self.shape).astype(self.dtype) + self.inputs = {'X': x} + self.outputs = {'Out': y} + self.init_attrs() + self.place = core.CUDAPlace(0) + + def init_attrs(self): + self.output_hist = output_hist + + def test_check_output(self): + self.check_output_with_place_customized(self.verify_output, self.place) + + def verify_output(self, outs): + result = convert_uint16_to_float(np.array(outs[0])) + hist, prob = self.output_hist(result) + np.testing.assert_allclose(hist, prob, rtol=0, atol=0.002) + + # TODO: Due to the lack of the self.python_api=paddle.uniform_random_inplace setting, the dynamic graph is temporarily turned off, set check_dygraph=False + def test_check_grad(self): + grads = [paddle.zeros(self.shape, dtype=self.dtype)] + self.check_grad_with_place( + self.place, + ['X'], + 'Out', + check_dygraph=False, + user_defined_grads=grads, + ) + + class TestUniformRandomInplaceOpIsInplace(unittest.TestCase): def setUp(self): self.shape = (1000, 784)