diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h index c51417730850cfd65ce8c98fe39a2aca524dfa03..b7dbf252103c6f30ed3d9f985cdc86eef26b17dc 100644 --- a/paddle/phi/kernels/funcs/elementwise_functor.h +++ b/paddle/phi/kernels/funcs/elementwise_functor.h @@ -563,6 +563,20 @@ struct RemainderFunctor { } }; +template <> +struct RemainderFunctor { + inline HOSTDEVICE dtype::bfloat16 operator()(const dtype::bfloat16 a, + const dtype::bfloat16 b) const { + float b_float = static_cast(b); + float res = fmod(static_cast(a), b_float); + + // Accoding to #PR26732: in dividen % divsor + // remainder shall have the same sign as divsor. + if ((res != 0.0f) && ((res < 0.0f) != (b_float < 0.0f))) res += b_float; + return static_cast(res); + } +}; + template struct InverseRemainderFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index e1eb5f85288394bb8b4d8661e3c71959fca9b96b..c6e693ddd3b90f860cf866003d66cc253728e9db 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -117,7 +117,8 @@ PD_REGISTER_KERNEL(remainder, double, int, int64_t, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL( floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {} PD_REGISTER_KERNEL(elementwise_pow, diff --git a/paddle/phi/kernels/legacy/kps/elementwise_raw_kernel.cu b/paddle/phi/kernels/legacy/kps/elementwise_raw_kernel.cu index 8fab237f0a5ee7440438d567990d91a4e54e75cb..23ea1264c63f420dca2e7e80bf45659447e619d7 100644 --- a/paddle/phi/kernels/legacy/kps/elementwise_raw_kernel.cu +++ b/paddle/phi/kernels/legacy/kps/elementwise_raw_kernel.cu @@ -157,7 +157,8 @@ PD_REGISTER_KERNEL(remainder_raw, double, int, float16, - int64_t) {} + int64_t, + bfloat16) {} PD_REGISTER_KERNEL(floor_divide_raw, KPS, ALL_LAYOUT, diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py index 55ebc5d28cff9cd818b1e7505fa6e2b9eba81ddb..8f34328241fef62b9c56806c7ea56890da6ed365 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py @@ -16,10 +16,15 @@ import random import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import ( + OpTest, + convert_float_to_uint16, + convert_uint16_to_float, +) import paddle from paddle import fluid +from paddle.fluid import core class TestElementwiseModOp(OpTest): @@ -106,14 +111,17 @@ class TestElementwiseModOpFloat(TestElementwiseModOp): self.check_output() -class TestElementwiseModOpFp16(TestElementwiseModOp): +@unittest.skipIf( + not core.is_compiled_with_cuda(), "core is not compiled with CUDA" +) +class TestElementwiseModFP16Op(TestElementwiseModOp): def init_dtype(self): self.dtype = np.float16 def init_input_output(self): self.x = np.random.uniform(-1000, 1000, [10, 10]).astype(self.dtype) self.y = np.random.uniform(-100, 100, [10, 10]).astype(self.dtype) - self.out = np.mod(self.x, self.y) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) def test_check_output(self): if self.attrs['axis'] == -1: @@ -122,6 +130,83 @@ class TestElementwiseModOpFp16(TestElementwiseModOp): self.check_output() +class TestElementwiseModFP16Op_ZeroDim1(TestElementwiseModFP16Op): + def init_input_output(self): + self.x = np.random.uniform(0, 10000, []).astype(np.float16) + self.y = np.random.uniform(0, 1000, []).astype(np.float16) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) + + +class TestElementwiseModFP16Op_ZeroDim2(TestElementwiseModFP16Op): + def init_input_output(self): + self.x = np.random.uniform(0, 10000, [10, 10]).astype(np.float16) + self.y = np.random.uniform(0, 1000, []).astype(np.float16) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) + + +class TestElementwiseModFP16Op_ZeroDim3(TestElementwiseModFP16Op): + def init_input_output(self): + self.x = np.random.uniform(0, 10000, []).astype(np.float16) + self.y = np.random.uniform(0, 1000, [10, 10]).astype(np.float16) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestElementwiseModBF16Op(OpTest): + def init_kernel_type(self): + self.use_mkldnn = False + + def init_input_output(self): + self.x = np.random.uniform(0, 10000, [10, 10]).astype(np.float32) + self.x = convert_uint16_to_float(convert_float_to_uint16(self.x)) + self.y = np.random.uniform(0, 1000, [10, 10]).astype(np.float32) + self.y = convert_uint16_to_float(convert_float_to_uint16(self.y)) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) + + def setUp(self): + self.op_type = "elementwise_mod" + self.python_api = paddle.remainder + self.public_python_api = paddle.remainder + self.axis = -1 + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + self.inputs = { + 'X': convert_float_to_uint16( + OpTest.np_dtype_to_fluid_dtype(self.x) + ), + 'Y': convert_float_to_uint16( + OpTest.np_dtype_to_fluid_dtype(self.y) + ), + } + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + self.outputs = {'Out': convert_float_to_uint16(self.out)} + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place) + + def init_dtype(self): + self.dtype = np.uint16 + + def init_axis(self): + pass + + +class TestElementwiseModBF16Op_ZeroDim1(TestElementwiseModBF16Op): + def init_input(self): + self.x = np.random.uniform(0, 10000, []).astype("float32") + self.x = convert_uint16_to_float(convert_float_to_uint16(self.x)) + self.y = np.random.uniform(0, 1000, []).astype("float32") + self.y = convert_uint16_to_float(convert_float_to_uint16(self.y)) + self.out = np.fmod(self.y + np.fmod(self.x, self.y), self.y) + + class TestElementwiseModOpDouble(TestElementwiseModOpFloat): def init_dtype(self): self.dtype = np.float64