From 85d5f26dee8cb7171e5b3075eef659e7f27c3ddf Mon Sep 17 00:00:00 2001 From: Difer <707065510@qq.com> Date: Fri, 2 Jun 2023 16:47:16 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90PaddlePaddle=20Hackathon=204=E3=80=91N?= =?UTF-8?q?o.56=20:add=20fp=20and=20bf16=20for=20bernoulli=20(#54232)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add fp&bf16 bernoulli * add check_dtype & fix error * fix rocm error --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 15 +++++-- python/paddle/tensor/random.py | 4 +- test/legacy_test/test_bernoulli_op.py | 47 ++++++++++++++++++++-- 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index edcf29e2d88..60e7b90e801 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -26,6 +26,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" @@ -51,11 +52,13 @@ __global__ void bernoulli_cuda_kernel( for (size_t i = 4 * thread_idx; i < size; i += total_thread * 4) { funcs::uniform_distribution dist; float4 rand = dist(&state); + using MPType = typename phi::dtype::MPTypeTrait::Type; #pragma unroll for (size_t j = 0; j < 4; j++) { size_t idx = i + j; if (idx < size) { - out_data[idx] = static_cast((&rand.x)[j] <= x_data[idx]); + out_data[idx] = + static_cast((&rand.x)[j] <= static_cast(x_data[idx])); } } } @@ -85,5 +88,11 @@ void BernoulliKernel(const Context& ctx, } // namespace phi -PD_REGISTER_KERNEL( - bernoulli, GPU, ALL_LAYOUT, phi::BernoulliKernel, float, double) {} +PD_REGISTER_KERNEL(bernoulli, + GPU, + ALL_LAYOUT, + phi::BernoulliKernel, + phi::dtype::float16, + phi::dtype::bfloat16, + float, + double) {} diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index a8206ff95bf..8683349c6e1 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -77,7 +77,9 @@ def bernoulli(x, name=None): if in_dynamic_mode(): return _C_ops.bernoulli(x) else: - check_variable_and_dtype(x, "x", ["float32", "float64"], "bernoulli") + check_variable_and_dtype( + x, "x", ["float32", "float64", "float16", "uint16"], "bernoulli" + ) helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( diff --git a/test/legacy_test/test_bernoulli_op.py b/test/legacy_test/test_bernoulli_op.py index af08b07237f..cce3c09400e 100644 --- a/test/legacy_test/test_bernoulli_op.py +++ b/test/legacy_test/test_bernoulli_op.py @@ -15,9 +15,10 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle +from paddle.fluid import core def output_hist(out): @@ -31,9 +32,18 @@ def output_hist(out): class TestBernoulliOp(OpTest): def setUp(self): self.op_type = "bernoulli" - self.inputs = {"X": np.random.uniform(size=(1000, 784))} + self.init_dtype() + self.init_test_case() + self.inputs = {"X": self.x} self.attrs = {} - self.outputs = {"Out": np.zeros((1000, 784)).astype("float32")} + self.outputs = {"Out": self.out} + + def init_dtype(self): + self.dtype = np.float32 + + def init_test_case(self): + self.x = np.random.uniform(size=(1000, 784)).astype(self.dtype) + self.out = np.zeros((1000, 784)).astype(self.dtype) def test_check_output(self): self.check_output_customized(self.verify_output) @@ -98,5 +108,36 @@ class TestRandomValue(unittest.TestCase): paddle.enable_static() +class TestBernoulliFP16Op(TestBernoulliOp): + def init_dtype(self): + self.dtype = np.float16 + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestBernoulliBF16Op(TestBernoulliOp): + def init_dtype(self): + self.dtype = np.uint16 + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place_customized(self.verify_output, place) + + def init_test_case(self): + self.x = convert_float_to_uint16( + np.random.uniform(size=(1000, 784)).astype("float32") + ) + self.out = convert_float_to_uint16( + np.zeros((1000, 784)).astype("float32") + ) + + def verify_output(self, outs): + hist, prob = output_hist(np.array(outs[0])) + np.testing.assert_allclose(hist, prob, atol=0.01) + + if __name__ == "__main__": unittest.main() -- GitLab