未验证 提交 78375990 编写于 作者: S sneaxiy 提交者: GitHub

refine float16 implementation (#38439)

上级 5f7e4a21
...@@ -95,7 +95,7 @@ class ClipKernel : public framework::OpKernel<T> { ...@@ -95,7 +95,7 @@ class ClipKernel : public framework::OpKernel<T> {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"max should be greater than or equal to min. " "max should be greater than or equal to min. "
"But received min = %f, max = %f", "But received min = %f, max = %f",
min, max)); static_cast<float>(min), static_cast<float>(max)));
auto* x_var = context.InputVar("X"); auto* x_var = context.InputVar("X");
if (x_var->IsType<framework::LoDTensor>()) { if (x_var->IsType<framework::LoDTensor>()) {
......
...@@ -73,7 +73,7 @@ template <> ...@@ -73,7 +73,7 @@ template <>
__forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask, __forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask,
float16 val, int delta, float16 val, int delta,
int width) { int width) {
return float16(__shfl_down_sync(mask, static_cast<half>(val), return float16(__shfl_down_sync(mask, val.to_half(),
static_cast<unsigned>(delta), width)); static_cast<unsigned>(delta), width));
} }
...@@ -103,7 +103,7 @@ CudaShuffleDownSync(unsigned mask, paddle::platform::complex<double> val, ...@@ -103,7 +103,7 @@ CudaShuffleDownSync(unsigned mask, paddle::platform::complex<double> val,
template <> template <>
__forceinline__ __device__ float16 CudaShuffleXorSync(unsigned mask, __forceinline__ __device__ float16 CudaShuffleXorSync(unsigned mask,
float16 val, int width) { float16 val, int width) {
return float16(__shfl_xor_sync(mask, static_cast<half>(val), width)); return float16(__shfl_xor_sync(mask, val.to_half(), width));
} }
template <> template <>
......
...@@ -214,7 +214,7 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -214,7 +214,7 @@ struct PADDLE_ALIGN(2) float16 {
// Conversion opertors // Conversion opertors
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline explicit operator half() const { HOSTDEVICE inline half to_half() const {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000 #if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
__half_raw h; __half_raw h;
h.x = x; h.x = x;
...@@ -233,7 +233,7 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -233,7 +233,7 @@ struct PADDLE_ALIGN(2) float16 {
} }
#endif #endif
HOSTDEVICE inline explicit operator float() const { HOSTDEVICE inline operator float() const {
#if defined(PADDLE_CUDA_FP16) && \ #if defined(PADDLE_CUDA_FP16) && \
(defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300)) (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
half tmp = *reinterpret_cast<const half*>(this); half tmp = *reinterpret_cast<const half*>(this);
...@@ -302,7 +302,7 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -302,7 +302,7 @@ struct PADDLE_ALIGN(2) float16 {
return static_cast<uint64_t>(static_cast<float>(*this)); return static_cast<uint64_t>(static_cast<float>(*this));
} }
HOSTDEVICE inline explicit operator double() const { HOSTDEVICE inline operator double() const {
return static_cast<double>(static_cast<float>(*this)); return static_cast<double>(static_cast<float>(*this));
} }
...@@ -350,7 +350,7 @@ DEVICE inline half operator+(const half& a, const half& b) { ...@@ -350,7 +350,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
return __hadd(a, b); return __hadd(a, b);
#else #else
float res = static_cast<float>(float16(a)) + static_cast<float>(float16(b)); float res = static_cast<float>(float16(a)) + static_cast<float>(float16(b));
return half(float16(res)); return float16(res).to_half();
#endif #endif
} }
...@@ -359,7 +359,7 @@ DEVICE inline half operator-(const half& a, const half& b) { ...@@ -359,7 +359,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
return __hsub(a, b); return __hsub(a, b);
#else #else
float res = static_cast<float>(float16(a)) - static_cast<float>(float16(b)); float res = static_cast<float>(float16(a)) - static_cast<float>(float16(b));
return half(float16(res)); return float16(res).to_half();
#endif #endif
} }
...@@ -368,7 +368,7 @@ DEVICE inline half operator*(const half& a, const half& b) { ...@@ -368,7 +368,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
return __hmul(a, b); return __hmul(a, b);
#else #else
float res = static_cast<float>(float16(a)) * static_cast<float>(float16(b)); float res = static_cast<float>(float16(a)) * static_cast<float>(float16(b));
return half(float16(res)); return float16(res).to_half();
#endif #endif
} }
...@@ -379,7 +379,7 @@ DEVICE inline half operator/(const half& a, const half& b) { ...@@ -379,7 +379,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
return __float2half(num / denom); return __float2half(num / denom);
#else #else
float res = static_cast<float>(float16(a)) / static_cast<float>(float16(b)); float res = static_cast<float>(float16(a)) / static_cast<float>(float16(b));
return half(float16(res)); return float16(res).to_half();
#endif #endif
} }
...@@ -388,7 +388,7 @@ DEVICE inline half operator-(const half& a) { ...@@ -388,7 +388,7 @@ DEVICE inline half operator-(const half& a) {
return __hneg(a); return __hneg(a);
#else #else
float res = -static_cast<float>(float16(a)); float res = -static_cast<float>(float16(a));
return half(float16(res)); return float16(res).to_half();
#endif #endif
} }
...@@ -470,7 +470,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) { ...@@ -470,7 +470,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
// in __host__ __device__ function // in __host__ __device__ function
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline float16 operator+(const float16& a, const float16& b) { DEVICE inline float16 operator+(const float16& a, const float16& b) {
return float16(__hadd(half(a), half(b))); return float16(__hadd(a.to_half(), b.to_half()));
} }
HOST inline float16 operator+(const float16& a, const float16& b) { HOST inline float16 operator+(const float16& a, const float16& b) {
return float16(static_cast<float>(a) + static_cast<float>(b)); return float16(static_cast<float>(a) + static_cast<float>(b));
...@@ -478,7 +478,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) { ...@@ -478,7 +478,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
#else #else
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return float16(__hadd(half(a), half(b))); return float16(__hadd(a.to_half(), b.to_half()));
#else #else
return float16(static_cast<float>(a) + static_cast<float>(b)); return float16(static_cast<float>(a) + static_cast<float>(b));
#endif #endif
...@@ -487,7 +487,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { ...@@ -487,7 +487,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline float16 operator-(const float16& a, const float16& b) { DEVICE inline float16 operator-(const float16& a, const float16& b) {
return float16(__hsub(half(a), half(b))); return float16(__hsub(a.to_half(), b.to_half()));
} }
HOST inline float16 operator-(const float16& a, const float16& b) { HOST inline float16 operator-(const float16& a, const float16& b) {
return float16(static_cast<float>(a) - static_cast<float>(b)); return float16(static_cast<float>(a) - static_cast<float>(b));
...@@ -495,7 +495,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) { ...@@ -495,7 +495,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
#else #else
HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return float16(__hsub(half(a), half(b))); return float16(__hsub(a.to_half(), b.to_half()));
#else #else
return float16(static_cast<float>(a) - static_cast<float>(b)); return float16(static_cast<float>(a) - static_cast<float>(b));
#endif #endif
...@@ -504,7 +504,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { ...@@ -504,7 +504,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline float16 operator*(const float16& a, const float16& b) { DEVICE inline float16 operator*(const float16& a, const float16& b) {
return float16(__hmul(half(a), half(b))); return float16(__hmul(a.to_half(), b.to_half()));
} }
HOST inline float16 operator*(const float16& a, const float16& b) { HOST inline float16 operator*(const float16& a, const float16& b) {
return float16(static_cast<float>(a) * static_cast<float>(b)); return float16(static_cast<float>(a) * static_cast<float>(b));
...@@ -512,7 +512,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) { ...@@ -512,7 +512,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
#else #else
HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return float16(__hmul(half(a), half(b))); return float16(__hmul(a.to_half(), b.to_half()));
#else #else
return float16(static_cast<float>(a) * static_cast<float>(b)); return float16(static_cast<float>(a) * static_cast<float>(b));
#endif #endif
...@@ -521,7 +521,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { ...@@ -521,7 +521,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline float16 operator/(const float16& a, const float16& b) { DEVICE inline float16 operator/(const float16& a, const float16& b) {
return float16(__hdiv(half(a), half(b))); return float16(__hdiv(a.to_half(), b.to_half()));
} }
HOST inline float16 operator/(const float16& a, const float16& b) { HOST inline float16 operator/(const float16& a, const float16& b) {
return float16(static_cast<float>(a) / static_cast<float>(b)); return float16(static_cast<float>(a) / static_cast<float>(b));
...@@ -530,8 +530,8 @@ HOST inline float16 operator/(const float16& a, const float16& b) { ...@@ -530,8 +530,8 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// TODO(kexinzhao): check which cuda version starts to support __hdiv // TODO(kexinzhao): check which cuda version starts to support __hdiv
float num = __half2float(half(a)); float num = __half2float(a.to_half());
float denom = __half2float(half(b)); float denom = __half2float(b.to_half());
return float16(num / denom); return float16(num / denom);
#else #else
return float16(static_cast<float>(a) / static_cast<float>(b)); return float16(static_cast<float>(a) / static_cast<float>(b));
...@@ -541,7 +541,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { ...@@ -541,7 +541,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline float16 operator-(const float16& a) { DEVICE inline float16 operator-(const float16& a) {
return float16(__hneg(half(a))); return float16(__hneg(a.to_half()));
} }
HOST inline float16 operator-(const float16& a) { HOST inline float16 operator-(const float16& a) {
float16 res; float16 res;
...@@ -551,7 +551,7 @@ HOST inline float16 operator-(const float16& a) { ...@@ -551,7 +551,7 @@ HOST inline float16 operator-(const float16& a) {
#else #else
HOSTDEVICE inline float16 operator-(const float16& a) { HOSTDEVICE inline float16 operator-(const float16& a) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return float16(__hneg(half(a))); return float16(__hneg(a.to_half()));
#else #else
float16 res; float16 res;
res.x = a.x ^ 0x8000; res.x = a.x ^ 0x8000;
...@@ -584,7 +584,7 @@ HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { // NOLINT ...@@ -584,7 +584,7 @@ HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { // NOLINT
// in __host__ __device__ function // in __host__ __device__ function
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator==(const float16& a, const float16& b) { DEVICE inline bool operator==(const float16& a, const float16& b) {
return __heq(half(a), half(b)); return __heq(a.to_half(), b.to_half());
} }
HOST inline bool operator==(const float16& a, const float16& b) { HOST inline bool operator==(const float16& a, const float16& b) {
return static_cast<float>(a) == static_cast<float>(b); return static_cast<float>(a) == static_cast<float>(b);
...@@ -592,7 +592,7 @@ HOST inline bool operator==(const float16& a, const float16& b) { ...@@ -592,7 +592,7 @@ HOST inline bool operator==(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __heq(half(a), half(b)); return __heq(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) == static_cast<float>(b); return static_cast<float>(a) == static_cast<float>(b);
#endif #endif
...@@ -601,7 +601,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { ...@@ -601,7 +601,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator!=(const float16& a, const float16& b) { DEVICE inline bool operator!=(const float16& a, const float16& b) {
return __hne(half(a), half(b)); return __hne(a.to_half(), b.to_half());
} }
HOST inline bool operator!=(const float16& a, const float16& b) { HOST inline bool operator!=(const float16& a, const float16& b) {
return static_cast<float>(a) != static_cast<float>(b); return static_cast<float>(a) != static_cast<float>(b);
...@@ -609,7 +609,7 @@ HOST inline bool operator!=(const float16& a, const float16& b) { ...@@ -609,7 +609,7 @@ HOST inline bool operator!=(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hne(half(a), half(b)); return __hne(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) != static_cast<float>(b); return static_cast<float>(a) != static_cast<float>(b);
#endif #endif
...@@ -618,7 +618,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { ...@@ -618,7 +618,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator<(const float16& a, const float16& b) { DEVICE inline bool operator<(const float16& a, const float16& b) {
return __hlt(half(a), half(b)); return __hlt(a.to_half(), b.to_half());
} }
HOST inline bool operator<(const float16& a, const float16& b) { HOST inline bool operator<(const float16& a, const float16& b) {
return static_cast<float>(a) < static_cast<float>(b); return static_cast<float>(a) < static_cast<float>(b);
...@@ -626,7 +626,7 @@ HOST inline bool operator<(const float16& a, const float16& b) { ...@@ -626,7 +626,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hlt(half(a), half(b)); return __hlt(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) < static_cast<float>(b); return static_cast<float>(a) < static_cast<float>(b);
#endif #endif
...@@ -635,7 +635,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { ...@@ -635,7 +635,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator<=(const float16& a, const float16& b) { DEVICE inline bool operator<=(const float16& a, const float16& b) {
return __hle(half(a), half(b)); return __hle(a.to_half(), b.to_half());
} }
HOST inline bool operator<=(const float16& a, const float16& b) { HOST inline bool operator<=(const float16& a, const float16& b) {
return static_cast<float>(a) <= static_cast<float>(b); return static_cast<float>(a) <= static_cast<float>(b);
...@@ -643,7 +643,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) { ...@@ -643,7 +643,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hle(half(a), half(b)); return __hle(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) <= static_cast<float>(b); return static_cast<float>(a) <= static_cast<float>(b);
#endif #endif
...@@ -652,7 +652,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { ...@@ -652,7 +652,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator>(const float16& a, const float16& b) { DEVICE inline bool operator>(const float16& a, const float16& b) {
return __hgt(half(a), half(b)); return __hgt(a.to_half(), b.to_half());
} }
HOST inline bool operator>(const float16& a, const float16& b) { HOST inline bool operator>(const float16& a, const float16& b) {
return static_cast<float>(a) > static_cast<float>(b); return static_cast<float>(a) > static_cast<float>(b);
...@@ -660,7 +660,7 @@ HOST inline bool operator>(const float16& a, const float16& b) { ...@@ -660,7 +660,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hgt(half(a), half(b)); return __hgt(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) > static_cast<float>(b); return static_cast<float>(a) > static_cast<float>(b);
#endif #endif
...@@ -669,7 +669,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { ...@@ -669,7 +669,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
#if defined(__HIPCC__) #if defined(__HIPCC__)
DEVICE inline bool operator>=(const float16& a, const float16& b) { DEVICE inline bool operator>=(const float16& a, const float16& b) {
return __hge(half(a), half(b)); return __hge(a.to_half(), b.to_half());
} }
HOST inline bool operator>=(const float16& a, const float16& b) { HOST inline bool operator>=(const float16& a, const float16& b) {
return static_cast<float>(a) >= static_cast<float>(b); return static_cast<float>(a) >= static_cast<float>(b);
...@@ -677,7 +677,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) { ...@@ -677,7 +677,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
#else // __HIPCC__ #else // __HIPCC__
HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hge(half(a), half(b)); return __hge(a.to_half(), b.to_half());
#else #else
return static_cast<float>(a) >= static_cast<float>(b); return static_cast<float>(a) >= static_cast<float>(b);
#endif #endif
...@@ -945,12 +945,12 @@ HOSTDEVICE inline float16 raw_uint16_to_float16(uint16_t a) { ...@@ -945,12 +945,12 @@ HOSTDEVICE inline float16 raw_uint16_to_float16(uint16_t a) {
// HIPCC has compile error if call __device__ function __hisnan in __host__ // HIPCC has compile error if call __device__ function __hisnan in __host__
// __device__ function // __device__ function
#if defined(PADDLE_CUDA_FP16) && defined(__HIPCC__) #if defined(PADDLE_CUDA_FP16) && defined(__HIPCC__)
DEVICE inline bool(isnan)(const float16& a) { return __hisnan(half(a)); } DEVICE inline bool(isnan)(const float16& a) { return __hisnan(a.to_half()); }
HOST inline bool(isnan)(const float16& a) { return (a.x & 0x7fff) > 0x7c00; } HOST inline bool(isnan)(const float16& a) { return (a.x & 0x7fff) > 0x7c00; }
#else #else
HOSTDEVICE inline bool(isnan)(const float16& a) { HOSTDEVICE inline bool(isnan)(const float16& a) {
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return __hisnan(half(a)); return __hisnan(a.to_half());
#else #else
return (a.x & 0x7fff) > 0x7c00; return (a.x & 0x7fff) > 0x7c00;
#endif #endif
......
...@@ -48,8 +48,8 @@ limitations under the License. */ ...@@ -48,8 +48,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \ out = reinterpret_cast<half *>(malloc(size)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \ hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
...@@ -73,8 +73,8 @@ limitations under the License. */ ...@@ -73,8 +73,8 @@ limitations under the License. */
hipMalloc(reinterpret_cast<void **>(&d_in2), size); \ hipMalloc(reinterpret_cast<void **>(&d_in2), size); \
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2); \ hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2); \
...@@ -99,8 +99,8 @@ limitations under the License. */ ...@@ -99,8 +99,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<bool *>(malloc(1)); \ out = reinterpret_cast<bool *>(malloc(1)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \ hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \ hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
...@@ -126,8 +126,8 @@ limitations under the License. */ ...@@ -126,8 +126,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \ out = reinterpret_cast<half *>(malloc(size)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \ op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
...@@ -151,8 +151,8 @@ limitations under the License. */ ...@@ -151,8 +151,8 @@ limitations under the License. */
cudaMalloc(reinterpret_cast<void **>(&d_in2), size); \ cudaMalloc(reinterpret_cast<void **>(&d_in2), size); \
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2); \ op_type<<<1, 1>>>(d_in1, d_in2); \
...@@ -177,8 +177,8 @@ limitations under the License. */ ...@@ -177,8 +177,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \ in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \ in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<bool *>(malloc(1)); \ out = reinterpret_cast<bool *>(malloc(1)); \
in1[0] = half(float16(v_in1)); \ in1[0] = float16(v_in1).to_half(); \
in2[0] = half(float16(v_in2)); \ in2[0] = float16(v_in2).to_half(); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \ cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \ op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
...@@ -221,7 +221,7 @@ void TestNeg(float v_in, float v_out) { ...@@ -221,7 +221,7 @@ void TestNeg(float v_in, float v_out) {
cudaMalloc(reinterpret_cast<void **>(&d_in), size); cudaMalloc(reinterpret_cast<void **>(&d_in), size);
#endif #endif
in = reinterpret_cast<half *>(malloc(size)); in = reinterpret_cast<half *>(malloc(size));
in[0] = half(float16(v_in)); in[0] = float16(v_in).to_half();
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipMemcpy(d_in, in, size, hipMemcpyHostToDevice); hipMemcpy(d_in, in, size, hipMemcpyHostToDevice);
#else #else
...@@ -299,17 +299,17 @@ TEST(float16, comparision_on_gpu) { ...@@ -299,17 +299,17 @@ TEST(float16, comparision_on_gpu) {
TEST(float16, conversion_on_gpu) { TEST(float16, conversion_on_gpu) {
// Explicit conversion to and from cuda half // Explicit conversion to and from cuda half
EXPECT_EQ(float16(half(float16(1.0f))).x, 0x3c00); EXPECT_EQ(float16(float16(1.0f).to_half()).x, 0x3c00);
EXPECT_EQ(float16(half(float16(0.5f))).x, 0x3800); EXPECT_EQ(float16(float16(0.5f).to_half()).x, 0x3800);
EXPECT_EQ(float16(half(float16(0.33333f))).x, 0x3555); EXPECT_EQ(float16(float16(0.33333f).to_half()).x, 0x3555);
EXPECT_EQ(float16(half(float16(0.0f))).x, 0x0000); EXPECT_EQ(float16(float16(0.0f).to_half()).x, 0x0000);
EXPECT_EQ(float16(half(float16(-0.0f))).x, 0x8000); EXPECT_EQ(float16(float16(-0.0f).to_half()).x, 0x8000);
EXPECT_EQ(float16(half(float16(65504.0f))).x, 0x7bff); EXPECT_EQ(float16(float16(65504.0f).to_half()).x, 0x7bff);
EXPECT_EQ(float16(half(float16(65536.0f))).x, 0x7c00); EXPECT_EQ(float16(float16(65536.0f).to_half()).x, 0x7c00);
// Assignment operator // Assignment operator
float16 v_assign; float16 v_assign;
v_assign = half(float16(1.0f)); v_assign = float16(1.0f).to_half();
EXPECT_EQ(v_assign.x, 0x3c00); EXPECT_EQ(v_assign.x, 0x3c00);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册