diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h index 52fb8c2531357ad7a2b2f8613e5c7fbcef52c6bb..a68dcc38aceeb4ce594bbf034681b3b075f69139 100644 --- a/paddle/fluid/platform/float16.h +++ b/paddle/fluid/platform/float16.h @@ -483,8 +483,77 @@ DEVICE inline bool operator>=(const half& a, const half& b) { #endif // PADDLE_CUDA_FP16 -// Arithmetic operators on ARMv8.2-A CPU -#if defined(PADDLE_WITH_NATIVE_FP16) +// Arithmetic operators for float16 on GPU +#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +DEVICE inline float16 operator+(const float16& a, const float16& b) { + return float16(__hadd(half(a), half(b))); +} + +DEVICE inline float16 operator-(const float16& a, const float16& b) { + return float16(__hsub(half(a), half(b))); +} + +DEVICE inline float16 operator*(const float16& a, const float16& b) { + return float16(__hmul(half(a), half(b))); +} + +DEVICE inline float16 operator/(const float16& a, const float16& b) { + // TODO(kexinzhao): check the cuda version that starts to support __hdiv + float num = __half2float(half(a)); + float denom = __half2float(half(b)); + return float16(num / denom); +} + +DEVICE inline float16 operator-(const float16& a) { + return float16(__hneg(half(a))); +} + +DEVICE inline float16& operator+=(float16& a, const float16& b) { + a = a + b; + return a; +} + +DEVICE inline float16& operator-=(float16& a, const float16& b) { + a = a - b; + return a; +} + +DEVICE inline float16& operator*=(float16& a, const float16& b) { + a = a * b; + return a; +} + +DEVICE inline float16& operator/=(float16& a, const float16& b) { + a = a / b; + return a; +} + +DEVICE inline bool operator==(const float16& a, const float16& b) { + return __heq(half(a), half(b)); +} + +DEVICE inline bool operator!=(const float16& a, const float16& b) { + return __hne(half(a), half(b)); +} + +DEVICE inline bool operator<(const float16& a, const float16& b) { + return __hlt(half(a), half(b)); +} + +DEVICE inline bool operator<=(const float16& a, const float16& b) { + return __hle(half(a), half(b)); +} + +DEVICE inline bool operator>(const float16& a, const float16& b) { + return __hgt(half(a), half(b)); +} + +DEVICE inline bool operator>=(const float16& a, const float16& b) { + return __hge(half(a), half(b)); +} + +// Arithmetic operators for float16 on ARMv8.2-A CPU +#elif defined(PADDLE_WITH_NATIVE_FP16) HOST inline float16 operator+(const float16& a, const float16& b) { float16 res; asm volatile( @@ -668,7 +737,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) { return (res & 0xffff) != 0; } -// Arithmetic operators, software emulated on other CPU +// Arithmetic operators for float16, software emulated on other CPU/GPU #else HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { return float16(float(a) + float(b)); diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index 5e2c460c41e45b5edb75567aa57278714346edbd..2939895d79be8988cd97d61ae53f73ac3ed8a2fe 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -86,10 +86,13 @@ class TestDropoutOp5(OpTest): class TestFP16DropoutOp1(OpTest): def setUp(self): x = np.random.random((32, 64)).astype("float16") + prob = 0.35 + out = x * (1.0 - prob) + self.op_type = "dropout" self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.attrs = {'dropout_prob': 0.35, 'fix_seed': True, 'is_test': True} - self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])} + self.attrs = {'dropout_prob': prob, 'fix_seed': True, 'is_test': True} + self.outputs = {'Out': out} def test_check_output(self): if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"): @@ -99,10 +102,13 @@ class TestFP16DropoutOp1(OpTest): class TestFP16DropoutOp2(OpTest): def setUp(self): x = np.random.random((32, 64, 3)).astype("float16") + prob = 0.75 + out = x * (1.0 - prob) + self.op_type = "dropout" self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.attrs = {'dropout_prob': 0.75, 'is_test': True} - self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])} + self.attrs = {'dropout_prob': prob, 'is_test': True} + self.outputs = {'Out': out} def test_check_output(self): if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"):