diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h index a68dcc38aceeb4ce594bbf034681b3b075f69139..7c2c6add07afecf9b4bf171b00813c784c26709e 100644 --- a/paddle/fluid/platform/float16.h +++ b/paddle/fluid/platform/float16.h @@ -484,72 +484,107 @@ DEVICE inline bool operator>=(const half& a, const half& b) { #endif // PADDLE_CUDA_FP16 // Arithmetic operators for float16 on GPU -#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 -DEVICE inline float16 operator+(const float16& a, const float16& b) { +#if defined(PADDLE_CUDA_FP16) +HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return float16(__hadd(half(a), half(b))); +#else + return float16(float(a) + float(b)); } -DEVICE inline float16 operator-(const float16& a, const float16& b) { +HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return float16(__hsub(half(a), half(b))); +#else + return float16(float(a) - float(b)); } -DEVICE inline float16 operator*(const float16& a, const float16& b) { +HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return float16(__hmul(half(a), half(b))); +#else + return float16(float(a) * float(b)); } -DEVICE inline float16 operator/(const float16& a, const float16& b) { - // TODO(kexinzhao): check the cuda version that starts to support __hdiv +HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + // TODO(kexinzhao): check which cuda version starts to support __hdiv float num = __half2float(half(a)); float denom = __half2float(half(b)); return float16(num / denom); +#else + return float16(float(a) / float(b)); } -DEVICE inline float16 operator-(const float16& a) { +HOSTDEVICE inline float16 operator-(const float16& a) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return float16(__hneg(half(a))); +#else + float16 res; + res.x = a.x ^ 0x8000; + return res; } -DEVICE inline float16& operator+=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) { a = a + b; return a; } -DEVICE inline float16& operator-=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) { a = a - b; return a; } -DEVICE inline float16& operator*=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) { a = a * b; return a; } -DEVICE inline float16& operator/=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { a = a / b; return a; } -DEVICE inline bool operator==(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __heq(half(a), half(b)); +#else + return float(a) == float(b); } -DEVICE inline bool operator!=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hne(half(a), half(b)); +#else + return float(a) != float(b); } -DEVICE inline bool operator<(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hlt(half(a), half(b)); +#else + return float(a) < float(b); } -DEVICE inline bool operator<=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hle(half(a), half(b)); +#else + return float(a) <= float(b); } -DEVICE inline bool operator>(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hgt(half(a), half(b)); +#else + return float(a) > float(b); } -DEVICE inline bool operator>=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hge(half(a), half(b)); +#else + return float(a) >= float(b); } // Arithmetic operators for float16 on ARMv8.2-A CPU @@ -737,71 +772,71 @@ HOST inline bool operator>=(const float16& a, const float16& b) { return (res & 0xffff) != 0; } -// Arithmetic operators for float16, software emulated on other CPU/GPU +// Arithmetic operators for float16, software emulated on other CPU #else -HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { +HOST inline float16 operator+(const float16& a, const float16& b) { return float16(float(a) + float(b)); } -HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { +HOST inline float16 operator-(const float16& a, const float16& b) { return float16(float(a) - float(b)); } -HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { +HOST inline float16 operator*(const float16& a, const float16& b) { return float16(float(a) * float(b)); } -HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { +HOST inline float16 operator/(const float16& a, const float16& b) { return float16(float(a) / float(b)); } -HOSTDEVICE inline float16 operator-(const float16& a) { +HOST inline float16 operator-(const float16& a) { float16 res; res.x = a.x ^ 0x8000; return res; } -HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) { +HOST inline float16& operator+=(float16& a, const float16& b) { a = float16(float(a) + float(b)); return a; } -HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) { +HOST inline float16& operator-=(float16& a, const float16& b) { a = float16(float(a) - float(b)); return a; } -HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) { +HOST inline float16& operator*=(float16& a, const float16& b) { a = float16(float(a) * float(b)); return a; } -HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { +HOST inline float16& operator/=(float16& a, const float16& b) { a = float16(float(a) / float(b)); return a; } -HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { +HOST inline bool operator==(const float16& a, const float16& b) { return float(a) == float(b); } -HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { +HOST inline bool operator!=(const float16& a, const float16& b) { return float(a) != float(b); } -HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { +HOST inline bool operator<(const float16& a, const float16& b) { return float(a) < float(b); } -HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { +HOST inline bool operator<=(const float16& a, const float16& b) { return float(a) <= float(b); } -HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { +HOST inline bool operator>(const float16& a, const float16& b) { return float(a) > float(b); } -HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { +HOST inline bool operator>=(const float16& a, const float16& b) { return float(a) >= float(b); } #endif