diff --git a/paddle/phi/kernels/gpu/poisson_grad_kernel.cu b/paddle/phi/kernels/gpu/poisson_grad_kernel.cu index 8c16bc51fffe5435b8b63393c197326858e49992..be7d28a6630cc300be7ad0ab4d0a8ff1405d1a20 100644 --- a/paddle/phi/kernels/gpu/poisson_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/poisson_grad_kernel.cu @@ -15,5 +15,11 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/poisson_grad_kernel_impl.h" -PD_REGISTER_KERNEL( - poisson_grad, GPU, ALL_LAYOUT, phi::PoissonGradKernel, float, double) {} +PD_REGISTER_KERNEL(poisson_grad, + GPU, + ALL_LAYOUT, + phi::PoissonGradKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/poisson_kernel.cu b/paddle/phi/kernels/gpu/poisson_kernel.cu index 302a9fe5ce581e3d0557712af7809dda2c700aba..1d1968b30ae6efb0c8dd46e8e4007fee8b9e7c5a 100644 --- a/paddle/phi/kernels/gpu/poisson_kernel.cu +++ b/paddle/phi/kernels/gpu/poisson_kernel.cu @@ -64,5 +64,11 @@ void PoissonKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { } // namespace phi -PD_REGISTER_KERNEL( - poisson, GPU, ALL_LAYOUT, phi::PoissonKernel, float, double) {} +PD_REGISTER_KERNEL(poisson, + GPU, + ALL_LAYOUT, + phi::PoissonKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/test/legacy_test/test_poisson_op.py b/test/legacy_test/test_poisson_op.py index ee66d578014c70395ec3525f8118d2780886458c..84edf6a322189485124377a74e4e830b69099468 100644 --- a/test/legacy_test/test_poisson_op.py +++ b/test/legacy_test/test_poisson_op.py @@ -16,9 +16,14 @@ import math import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import ( + OpTest, + convert_float_to_uint16, + convert_uint16_to_float, +) import paddle +from paddle.fluid import core paddle.enable_static() paddle.seed(100) @@ -42,17 +47,20 @@ class TestPoissonOp1(OpTest): def setUp(self): self.op_type = "poisson" self.python_api = paddle.tensor.poisson + self.init_dtype() self.config() self.attrs = {} self.inputs = {'X': np.full([2048, 1024], self.lam, dtype=self.dtype)} self.outputs = {'Out': np.ones([2048, 1024], dtype=self.dtype)} + def init_dtype(self): + self.dtype = "float64" + def config(self): self.lam = 10 self.a = 5 self.b = 15 - self.dtype = "float64" def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0]), self.lam, self.a, self.b) @@ -368,5 +376,56 @@ class TestPoissonAPI(unittest.TestCase): paddle.enable_static() +class TestPoissonFP16OP(TestPoissonOp1): + def init_dtype(self): + self.dtype = np.float16 + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestPoissonBF16Op(OpTest): + def setUp(self): + self.op_type = "poisson" + self.python_api = paddle.tensor.poisson + self.__class__.op_type = self.op_type + self.config() + x = np.full([2048, 1024], self.lam, dtype="float32") + out = np.ones([2048, 1024], dtype="float32") + self.attrs = {} + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': convert_float_to_uint16(out)} + + def config(self): + self.lam = 10 + self.a = 5 + self.b = 15 + self.dtype = np.uint16 + + def verify_output(self, outs): + hist, prob = output_hist( + convert_uint16_to_float(np.array(outs[0])), self.lam, self.a, self.b + ) + np.testing.assert_allclose(hist, prob, rtol=0.01) + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place_customized(self.verify_output, place) + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + ['X'], + 'Out', + user_defined_grads=[np.zeros([2048, 1024], dtype="float32")], + user_defined_grad_outputs=[ + np.random.rand(2048, 1024).astype("float32") + ], + ) + + if __name__ == "__main__": unittest.main()