From a5feb771592d1bd7340ff7132518d6c52829b8e7 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Mon, 27 Nov 2017 17:12:21 -0800 Subject: [PATCH] address pr comment --- paddle/math/float16.h | 839 +++++++++++++++++------------ paddle/math/tests/test_float16.cpp | 2 + 2 files changed, 482 insertions(+), 359 deletions(-) diff --git a/paddle/math/float16.h b/paddle/math/float16.h index 3b2217414..65c0489e1 100644 --- a/paddle/math/float16.h +++ b/paddle/math/float16.h @@ -16,9 +16,14 @@ limitations under the License. */ #include +#ifdef PADDLE_WITH_CUDA #include +#endif // PADDLE_WITH_CUDA + #include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/platform/hostdevice.h" + #ifdef __GNUC__ #define PADDLE_GNUC_VER (__GNUC__ * 10 + __GNUC_MINOR__) #else @@ -31,25 +36,12 @@ limitations under the License. */ #define PADDLE_CLANG_VER 0 #endif // __clang__ -#ifdef __CUDACC__ -#define PADDLE_HOSTDEVICE __host__ __device__ -#if CUDA_VERSION >= 7050 +#if defined(__CUDACC__) && CUDA_VERSION >= 7050 #define PADDLE_CUDA_FP16 #include -#endif // CUDA_VERSION >= 7050 -#else -#define PADDLE_HOSTDEVICE -#endif // __CUDACC__ - -#ifdef __arm__ -#define PADDLE_ARM_32 #endif -#ifdef __aarch64__ -#define PADDLE_ARM_64 -#endif - -#if defined(PADDLE_ARM_32) || defined(PADDLE_ARM_64) +#if defined(__arm__) || defined(__aarch64__) #define PADDLE_ARM #endif @@ -58,19 +50,12 @@ limitations under the License. */ #include #endif -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_32) -#define PADDLE_NEON_32 -#endif - -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_64) -#define PADDLE_NEON_64 +#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ + (PADDLE_GNUC_VER >= 62 || PADDLE_CLANG_VER >= 37) +#define PADDLE_WITH_NATIVE_FP16 #endif -#ifdef PADDLE_ARM -#ifdef __F16C__ -#undef __F16C__ -#endif // __F16C__ -#else +#ifndef PADDLE_ARM #include #endif // PADDLE_ARM @@ -78,27 +63,20 @@ limitations under the License. */ namespace paddle { -struct float16; - -namespace fp16_impl { -// Convert from float to half precision in round-to-nearest-even mode -PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f); -PADDLE_HOSTDEVICE inline float half_to_float(float16 h); -} // namespace fp16_impl - // Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated // and aligned at least on a 2-byte boundary, which leads to efficient // memory access of float16 struct and also makes float16 compatible // with CUDA half, ARM float16_t, and Eigen::half data types. struct PADDLE_ALIGN(2) float16 { +public: uint16_t x; - PADDLE_HOSTDEVICE inline float16() : x(0) {} + HOSTDEVICE inline float16() : x(0) {} - PADDLE_HOSTDEVICE inline float16(const float16& h) : x(h.x) {} + HOSTDEVICE inline float16(const float16& h) : x(h.x) {} #ifdef PADDLE_CUDA_FP16 - PADDLE_HOSTDEVICE inline float16(const half& h) { + HOSTDEVICE inline explicit float16(const half& h) { #if CUDA_VERSION >= 9000 x = reinterpret_cast<__half_raw*>(&h)->x; #else @@ -107,78 +85,64 @@ struct PADDLE_ALIGN(2) float16 { } #endif // PADDLE_CUDA_FP16 - PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {} + HOSTDEVICE inline explicit float16(const Eigen::half& h) : x(h.x) {} -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ - (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) +#ifdef PADDLE_WITH_NATIVE_FP16 // __fp16 is a native half precision data type for arm cpu, // float16_t is an alias for __fp16 in arm_fp16.h, // which is included in arm_neon.h. - PADDLE_HOSTDEVICE inline float16(const float16_t& h) { - float16_t tmp = h; - x = *reinterpret_cast(&tmp); + HOSTDEVICE inline explicit float16(const float16_t& h) { + x = *reinterpret_cast(&h); } #endif - PADDLE_HOSTDEVICE inline explicit float16(bool b) : x(b ? 0x3c00 : 0) {} - - PADDLE_HOSTDEVICE inline explicit float16(int8_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } - - PADDLE_HOSTDEVICE inline explicit float16(uint8_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } - - PADDLE_HOSTDEVICE inline explicit float16(int16_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } - - PADDLE_HOSTDEVICE inline explicit float16(uint16_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } + HOSTDEVICE inline explicit float16(float val) { +#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + half tmp = __float2half(val); + x = *reinterpret_cast(&tmp); - PADDLE_HOSTDEVICE inline explicit float16(int32_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } +#elif defined(PADDLE_NEON) + float32x4_t tmp = vld1q_dup_f32(&val); + float16_t res = vget_lane_f16(vcvt_f16_f32(tmp), 0); + x = *reinterpret_cast(&res); - PADDLE_HOSTDEVICE inline explicit float16(uint32_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } +#elif defined(__F16C__) + x = _cvtss_sh(val, 0); - PADDLE_HOSTDEVICE inline explicit float16(int64_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } +#else + // Conversion routine adapted from + // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion + Bits v, s; + v.f = val; + uint32_t sign = v.si & sigN; + v.si ^= sign; + sign >>= shiftSign; // logical shift + s.si = mulN; + s.si = s.f * v.f; // correct subnormals + v.si ^= (s.si ^ v.si) & -(minN > v.si); + v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); + v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); + v.ui >>= shift; // logical shift + v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); + v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); + x = v.ui | sign; - PADDLE_HOSTDEVICE inline explicit float16(uint64_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; +#endif } - PADDLE_HOSTDEVICE inline explicit float16(float val) { - float16 res = fp16_impl::float_to_half_rn(val); - x = res.x; - } + HOSTDEVICE inline explicit float16(bool b) : x(b ? 0x3c00 : 0) {} - PADDLE_HOSTDEVICE inline explicit float16(double val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; - } + template + HOSTDEVICE inline explicit float16(const T& val) + : x(float16(static_cast(val)).x) {} - PADDLE_HOSTDEVICE inline float16& operator=(const float16& rhs) { + HOSTDEVICE inline float16& operator=(const float16& rhs) { x = rhs.x; return *this; } #ifdef PADDLE_CUDA_FP16 - PADDLE_HOSTDEVICE inline float16& operator=(const half& rhs) { + HOSTDEVICE inline float16& operator=(const half& rhs) { #if CUDA_VERSION >= 9000 x = reinterpret_cast<__half_raw*>(&rhs)->x; #else @@ -188,87 +152,75 @@ struct PADDLE_ALIGN(2) float16 { } #endif - PADDLE_HOSTDEVICE inline float16& operator=(const Eigen::half& rhs) { + HOSTDEVICE inline float16& operator=(const Eigen::half& rhs) { x = rhs.x; return *this; } -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ - (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) - PADDLE_HOSTDEVICE inline float16& operator=(const float16_t& rhs) { - float16_t tmp = rhs; - x = *reinterpret_cast(&tmp); +#ifdef PADDLE_WITH_NATIVE_FP16 + HOSTDEVICE inline float16& operator=(const float16_t& rhs) { + x = *reinterpret_cast(&rhs); return *this; } #endif - PADDLE_HOSTDEVICE inline float16& operator=(bool b) { + HOSTDEVICE inline float16& operator=(bool b) { x = b ? 0x3c00 : 0; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(int8_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(int8_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(uint8_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(uint8_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(int16_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(int16_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(uint16_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(uint16_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(int32_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(int32_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(uint32_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(uint32_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(int64_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(int64_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(uint64_t val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(uint64_t val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(float val) { - float16 res = fp16_impl::float_to_half_rn(val); - x = res.x; + HOSTDEVICE inline float16& operator=(float val) { + x = float16(val).x; return *this; } - PADDLE_HOSTDEVICE inline float16& operator=(double val) { - float16 res = fp16_impl::float_to_half_rn(static_cast(val)); - x = res.x; + HOSTDEVICE inline float16& operator=(double val) { + x = float16(val).x; return *this; } #ifdef PADDLE_CUDA_FP16 - PADDLE_HOSTDEVICE inline operator half() const { + HOSTDEVICE inline explicit operator half() const { #if CUDA_VERSION >= 9000 __half_raw h; h.x = x; @@ -281,186 +233,504 @@ struct PADDLE_ALIGN(2) float16 { } #endif // PADDLE_CUDA_FP16 - PADDLE_HOSTDEVICE inline operator Eigen::half() const { + HOSTDEVICE inline explicit operator Eigen::half() const { Eigen::half h; h.x = x; return h; } -#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ - (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) - PADDLE_HOSTDEVICE inline operator float16_t() const { - float16 h = *this; - return *reinterpret_cast(&h); +#ifdef PADDLE_WITH_NATIVE_FP16 + HOSTDEVICE inline explicit operator float16_t() const { + return *reinterpret_cast(this); } #endif - PADDLE_HOSTDEVICE inline explicit operator bool() const { - return (x & 0x7fff) != 0; - } + HOSTDEVICE inline explicit operator float() const { +#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 + half tmp = *reinterpret_cast(this); + return __half2float(tmp); + +#elif defined(PADDLE_NEON) + float16x4_t res = vld1_dup_f16(reinterpret_cast(this)); + return vgetq_lane_f32(vcvt_f32_f16(res), 0); - PADDLE_HOSTDEVICE inline explicit operator int8_t() const { - return static_cast(fp16_impl::half_to_float(*this)); +#elif defined(__F16C__) + return _cvtsh_ss(this->x); + +#else + // Conversion routine adapted from + // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion + Bits v; + v.ui = this->x; + int32_t sign = v.si & sigC; + v.si ^= sign; + sign <<= shiftSign; + v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC); + v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC); + Bits s; + s.si = mulC; + s.f *= v.si; + int32_t mask = -(norC > v.si); + v.si <<= shift; + v.si ^= (s.si ^ v.si) & mask; + v.si |= sign; + return v.f; + +#endif } - PADDLE_HOSTDEVICE inline explicit operator uint8_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; } + + HOSTDEVICE inline explicit operator int8_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator int16_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator uint8_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator uint16_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator int16_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator int32_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator uint16_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator uint32_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator int32_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator int64_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator uint32_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator uint64_t() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator int64_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator float() const { - return fp16_impl::half_to_float(*this); + HOSTDEVICE inline explicit operator uint64_t() const { + return static_cast(float(*this)); } - PADDLE_HOSTDEVICE inline explicit operator double() const { - return static_cast(fp16_impl::half_to_float(*this)); + HOSTDEVICE inline explicit operator double() const { + return static_cast(float(*this)); } + +private: + union Bits { + float f; + int32_t si; + uint32_t ui; + }; + + static const int shift = 13; + static const int shiftSign = 16; + + static const int32_t infN = 0x7F800000; + static const int32_t maxN = 0x477FE000; // max flt16 as flt32 + static const int32_t minN = 0x38800000; // min flt16 normal as flt32 + static const int32_t sigN = 0x80000000; // sign bit + + static constexpr int32_t infC = infN >> shift; + static constexpr int32_t nanN = (infC + 1) + << shift; // minimum flt16 nan as float32 + static constexpr int32_t maxC = maxN >> shift; + static constexpr int32_t minC = minN >> shift; + static constexpr int32_t sigC = sigN >> shiftSign; + + static const int32_t mulN = 0x52000000; // (1 << 23) / minN + static const int32_t mulC = 0x33800000; // minN / (1 << (23 - shift)) + static const int32_t subC = 0x003FF; // max flt32 subnormal downshifted + static const int32_t norC = 0x00400; // min flt32 normal downshifted + + static constexpr int32_t maxD = infC - maxC - 1; + static constexpr int32_t minD = minC - subC - 1; }; -// Arithmetic operators -#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 -__device__ inline float16 operator+(const float16& a, const float16& b) { +// Arithmetic operators on GPU +// CUDA 9.0 provides built-in arithmetic operators for half while +// CUDA 7.5 and 8.0 do not. The arithmetic operators defined here are +// for users to write similar CUDA code in CUDA 7.5 and 8.0 as in +// CUDA 9.0 regarding the half data type. +#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && \ + __CUDA_ARCH__ >= 530 && CUDA_VERSION < 9000 +DEVICE inline half operator+(const half& a, const half& b) { + return __hadd(a, b); +} + +DEVICE inline half operator-(const half& a, const half& b) { + return __hsub(a, b); +} + +DEVICE inline half operator*(const half& a, const half& b) { + return __hmul(a, b); +} + +DEVICE inline half operator/(const half& a, const half& b) { + float num = __half2float(a); + float denom = __half2float(b); + return __float2half(num / denom); +} + +DEVICE inline half operator-(const half& a) { return __hneg(a); } + +DEVICE inline half& operator+=(half& a, const half& b) { + a = a + b; + return a; +} + +DEVICE inline half& operator-=(half& a, const half& b) { + a = a - b; + return a; +} + +DEVICE inline half& operator*=(half& a, const half& b) { + a = a * b; + return a; +} + +DEVICE inline half& operator/=(half& a, const half& b) { + a = a / b; + return a; +} + +DEVICE inline bool operator==(const half& a, const half& b) { + return __heq(a, b); +} + +DEVICE inline bool operator!=(const half& a, const half& b) { + return __hne(a, b); +} + +DEVICE inline bool operator<(const half& a, const half& b) { + return __hlt(a, b); +} + +DEVICE inline bool operator<=(const half& a, const half& b) { + return __hle(a, b); +} + +DEVICE inline bool operator>(const half& a, const half& b) { + return __hgt(a, b); +} + +DEVICE inline bool operator>=(const half& a, const half& b) { + return __hge(a, b); +} + +/* +DEVICE inline float16 operator+(const float16& a, const float16& b) { return float16(__hadd(half(a), half(b))); } -__device__ inline float16 operator-(const float16& a, const float16& b) { +DEVICE inline float16 operator-(const float16& a, const float16& b) { return float16(__hsub(half(a), half(b))); } -__device__ inline float16 operator*(const float16& a, const float16& b) { +DEVICE inline float16 operator*(const float16& a, const float16& b) { return float16(__hmul(half(a), half(b))); } -__device__ inline float16 operator/(const float16& a, const float16& b) { - // TODO(kexinzhao): check the cuda version that starts to support __hdiv +DEVICE inline float16 operator/(const float16& a, const float16& b) { float num = __half2float(half(a)); float denom = __half2float(half(b)); return float16(num / denom); } -__device__ inline float16 operator-(const float16& a) { +DEVICE inline float16 operator-(const float16& a) { return float16(__hneg(half(a))); } -__device__ inline float16& operator+=(float16& a, const float16& b) { +DEVICE inline float16& operator+=(float16& a, const float16& b) { a = a + b; return a; } -__device__ inline float16& operator-=(float16& a, const float16& b) { +DEVICE inline float16& operator-=(float16& a, const float16& b) { a = a - b; return a; } -__device__ inline float16& operator*=(float16& a, const float16& b) { +DEVICE inline float16& operator*=(float16& a, const float16& b) { a = a * b; return a; } -__device__ inline float16& operator/=(float16& a, const float16& b) { +DEVICE inline float16& operator/=(float16& a, const float16& b) { a = a / b; return a; } -__device__ inline bool operator==(const float16& a, const float16& b) { +DEVICE inline bool operator==(const float16& a, const float16& b) { return __heq(half(a), half(b)); } -__device__ inline bool operator!=(const float16& a, const float16& b) { +DEVICE inline bool operator!=(const float16& a, const float16& b) { return __hne(half(a), half(b)); } -__device__ inline bool operator<(const float16& a, const float16& b) { +DEVICE inline bool operator<(const float16& a, const float16& b) { return __hlt(half(a), half(b)); } -__device__ inline bool operator<=(const float16& a, const float16& b) { +DEVICE inline bool operator<=(const float16& a, const float16& b) { return __hle(half(a), half(b)); } -__device__ inline bool operator>(const float16& a, const float16& b) { +DEVICE inline bool operator>(const float16& a, const float16& b) { return __hgt(half(a), half(b)); } -__device__ inline bool operator>=(const float16& a, const float16& b) { +DEVICE inline bool operator>=(const float16& a, const float16& b) { return __hge(half(a), half(b)); } +*/ + +// Arithmetic operators on ARMv8.2-A CPU +#elif defined(PADDLE_WITH_NATIVE_FP16) +HOST inline float16 operator+(const float16& a, const float16& b) { + float16 res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fadd h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +HOST inline float16 operator-(const float16& a, const float16& b) { + float16 res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fsub h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +HOST inline float16 operator*(const float16& a, const float16& b) { + float16 res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fmul h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} + +HOST inline float16 operator/(const float16& a, const float16& b) { + float16 res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fdiv h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0", "v1"); + return res; +} -// On ARMv8.2-A CPU -#elif defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \ - (PADDLE_GNUC_VER >= 71 || PADDLE_CLANG_VER >= 39) -__host__ inline float16 operator+(const float16& a, const float16& b) { +HOST inline float16 operator-(const float16& a) { + float16 res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "fneg h0, h0\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [res_ptr] "r"(&(res.x)) + : // clobbers + "memory", "v0"); + return res; +} + +HOST inline float16& operator+=(float16& a, const float16& b) { + a = a + b; + return a; +} + +HOST inline float16& operator-=(float16& a, const float16& b) { + a = a - b; + return a; +} + +HOST inline float16& operator*=(float16& a, const float16& b) { + a = a * b; + return a; +} + +HOST inline float16& operator/=(float16& a, const float16& b) { + a = a / b; + return a; +} + +HOST inline bool operator==(const float16& a, const float16& b) { + uint16_t res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmeq h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +HOST inline bool operator!=(const float16& a, const float16& b) { + return !(a == b); +} + +HOST inline bool operator<(const float16& a, const float16& b) { + uint16_t res; + asm volatile( + "ld1 {v1.h}[0], [%[a_ptr]]\n" + "ld1 {v0.h}[0], [%[b_ptr]]\n" + "fcmgt h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +HOST inline bool operator<=(const float16& a, const float16& b) { + uint16_t res; + asm volatile( + "ld1 {v1.h}[0], [%[a_ptr]]\n" + "ld1 {v0.h}[0], [%[b_ptr]]\n" + "fcmge h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +HOST inline bool operator>(const float16& a, const float16& b) { + uint16_t res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmgt h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +HOST inline bool operator>=(const float16& a, const float16& b) { + uint16_t res; + asm volatile( + "ld1 {v0.h}[0], [%[a_ptr]]\n" + "ld1 {v1.h}[0], [%[b_ptr]]\n" + "fcmge h0, h0, h1\n" + "st1 {v0.h}[0], [%[res_ptr]]\n" + : // outputs + : // inputs + [a_ptr] "r"(&(a.x)), + [b_ptr] "r"(&(b.x)), + [res_ptr] "r"(&res) + : // clobbers + "memory", "v0", "v1"); + return (res & 0xffff) != 0; +} + +/* +HOST inline float16 operator+(const float16& a, const float16& b) { return float16(vaddh_f16(float16_t(a), float16_t(b))); } -__host__ inline float16 operator-(const float16& a, const float16& b) { +HOST inline float16 operator-(const float16& a, const float16& b) { return float16(vsubh_f16(float16_t(a), float16_t(b))); } -__host__ inline float16 operator*(const float16& a, const float16& b) { +HOST inline float16 operator*(const float16& a, const float16& b) { return float16(vmulh_f16(float16_t(a), float16_t(b))); } -__host__ inline float16 operator/(const float16& a, const float16& b) { +HOST inline float16 operator/(const float16& a, const float16& b) { return float16(vdivh_f16(float16_t(a), float16_t(b))); } -__host__ inline float16 operator-(const float16& a) { +HOST inline float16 operator-(const float16& a) { return float16(vnegh_f16(float16_t(a))); } -__host__ inline float16& operator+=(float16& a, const float16& b) { +HOST inline float16& operator+=(float16& a, const float16& b) { a = a + b; return a; } -__host__ inline float16& operator-=(float16& a, const float16& b) { +HOST inline float16& operator-=(float16& a, const float16& b) { a = a - b; return a; } -__host__ inline float16& operator*=(float16& a, const float16& b) { +HOST inline float16& operator*=(float16& a, const float16& b) { a = a * b; return a; } -__host__ inline float16& operator/=(float16& a, const float16& b) { +HOST inline float16& operator/=(float16& a, const float16& b) { a = a / b; return a; } -__host__ inline bool operator==(const float16& a, const float16& b) { +HOST inline bool operator==(const float16& a, const float16& b) { return static_cast(vceqh_f16(float16_t(a), float16_t(b))); } -__host__ inline bool operator!=(const float16& a, const float16& b) { +HOST inline bool operator!=(const float16& a, const float16& b) { return !(a == b); } -__host__ inline bool operator<(const float16& a, const float16& b) { +HOST inline bool operator<(const float16& a, const float16& b) { #ifdef PADDLE_NEON_64 return static_cast(vclth_f16(float16_t(a), float16_t(b))); #else @@ -468,7 +738,7 @@ __host__ inline bool operator<(const float16& a, const float16& b) { #endif // PADDLE_NEON_64 } -__host__ inline bool operator<=(const float16& a, const float16& b) { +HOST inline bool operator<=(const float16& a, const float16& b) { #ifdef PADDLE_NEON_64 return static_cast(vcleh_f16(float16_t(a), float16_t(b))); #else @@ -476,7 +746,7 @@ __host__ inline bool operator<=(const float16& a, const float16& b) { #endif // PADDLE_NEON_64 } -__host__ inline bool operator>(const float16& a, const float16& b) { +HOST inline bool operator>(const float16& a, const float16& b) { #ifdef PADDLE_NEON_64 return static_cast(vcgth_f16(float16_t(a), float16_t(b))); #else @@ -484,231 +754,82 @@ __host__ inline bool operator>(const float16& a, const float16& b) { #endif // PADDLE_NEON_64 } -__host__ inline bool operator>=(const float16& a, const float16& b) { +HOST inline bool operator>=(const float16& a, const float16& b) { #ifdef PADDLE_NEON_64 return static_cast(vcgeh_f16(float16_t(a), float16_t(b))); #else return float(a) >= float(b); #endif // PADDLE_NEON_64 } +*/ -#else // Software emulation on other cpu -PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { +// Arithmetic operators, software emulated on other CPU +#else +HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { return float16(float(a) + float(b)); } -PADDLE_HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { +HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) { return float16(float(a) - float(b)); } -PADDLE_HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { +HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) { return float16(float(a) * float(b)); } -PADDLE_HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { +HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) { return float16(float(a) / float(b)); } -PADDLE_HOSTDEVICE inline float16 operator-(const float16& a) { +HOSTDEVICE inline float16 operator-(const float16& a) { float16 res; res.x = a.x ^ 0x8000; return res; } -PADDLE_HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) { a = float16(float(a) + float(b)); return a; } -PADDLE_HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) { a = float16(float(a) - float(b)); return a; } -PADDLE_HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) { a = float16(float(a) * float(b)); return a; } -PADDLE_HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { +HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { a = float16(float(a) / float(b)); return a; } -PADDLE_HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator==(const float16& a, const float16& b) { return float(a) == float(b); } -PADDLE_HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) { return float(a) != float(b); } -PADDLE_HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator<(const float16& a, const float16& b) { return float(a) < float(b); } -PADDLE_HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) { return float(a) <= float(b); } -PADDLE_HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator>(const float16& a, const float16& b) { return float(a) > float(b); } -PADDLE_HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { +HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { return float(a) >= float(b); } #endif - -namespace fp16_impl { - -union Bits { - float f; - int32_t si; - uint32_t ui; -}; - -const int shift = 13; -const int shiftSign = 16; - -const int32_t infN = 0x7F800000; -const int32_t maxN = 0x477FE000; // max flt16 as flt32 -const int32_t minN = 0x38800000; // min flt16 normal as flt32 -const int32_t sigN = 0x80000000; // sign bit - -constexpr int32_t infC = infN >> shift; -constexpr int32_t nanN = (infC + 1) << shift; // minimum flt16 nan as float32 -constexpr int32_t maxC = maxN >> shift; -constexpr int32_t minC = minN >> shift; -constexpr int32_t sigC = sigN >> shiftSign; - -const int32_t mulN = 0x52000000; // (1 << 23) / minN -const int32_t mulC = 0x33800000; // minN / (1 << (23 - shift)) -const int32_t subC = 0x003FF; // max flt32 subnormal downshifted -const int32_t norC = 0x00400; // min flt32 normal downshifted - -constexpr int32_t maxD = infC - maxC - 1; -constexpr int32_t minD = minC - subC - 1; - -PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) { -#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 - half tmp = __float2half(f); - return *reinterpret_cast(&tmp); - -#elif defined(PADDLE_NEON_64) - float16 res; - asm volatile( - "ld1 {v0.s}[0], [%[float_ptr]]\n" - "fcvt h0, s0\n" - "st1 {v0.h}[0], [%[half_ptr]]\n" - : // outputs - : // inputs - [float_ptr] "r"(&f), - [half_ptr] "r"(&(res.x)) - : // clobbers - "memory", "v0"); - return res; - -#elif defined(PADDLE_NEON_32) - float16 res; - asm volatile( - "vld1.32 {d0[0]}, [%[float_ptr]]\n" - "vcvt.f16.f32 d0, q0\n" - "vst1.16 {d0[0]}, [%[half_ptr]]\n" - : // outputs - : // inputs - [float_ptr] "r"(&f), - [half_ptr] "r"(&(res.x)) - : // clobbers - "memory", "d0"); - return res; - -#elif defined(__F16C__) - float16 res; - res.x = _cvtss_sh(f, 0); - return res; - -#else - // Conversion routine adapted from - // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion - Bits v, s; - v.f = f; - uint32_t sign = v.si & sigN; - v.si ^= sign; - sign >>= shiftSign; // logical shift - s.si = mulN; - s.si = s.f * v.f; // correct subnormals - v.si ^= (s.si ^ v.si) & -(minN > v.si); - v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); - v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); - v.ui >>= shift; // logical shift - v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); - v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); - float16 res; - res.x = v.ui | sign; - return res; - -#endif -} - -PADDLE_HOSTDEVICE inline float half_to_float(float16 h) { -#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 - half tmp = *reinterpret_cast(&h); - return __half2float(tmp); - -#elif defined(PADDLE_NEON_64) - float res; - asm volatile( - "ld1 {v0.h}[0], [%[half_ptr]]\n" - "fcvt s0, h0\n" - "st1 {v0.s}[0], [%[float_ptr]]\n" - : // outputs - : // inputs - [half_ptr] "r"(&(h.x)), - [float_ptr] "r"(&res) - : // clobbers - "memory", "v0"); - return res; - -#elif defined(PADDLE_NEON_32) - float res; - asm volatile( - "vld1.16 {d0[0]}, [%[half_ptr]]\n" - "vcvt.f32.f16 q0, d0\n" - "vst1.32 {d0[0]}, [%[float_ptr]]\n" - : // outputs - : // inputs - [half_ptr] "r"(&(h.x)), - [float_ptr] "r"(&res) - : // clobbers - "memory", "v0"); - return res; - -#elif defined(__F16C__) - return _cvtsh_ss(h.x); - -#else - // Conversion routine adapted from - // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion - Bits v; - v.ui = h.x; - int32_t sign = v.si & sigC; - v.si ^= sign; - sign <<= shiftSign; - v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC); - v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC); - Bits s; - s.si = mulC; - s.f *= v.si; - int32_t mask = -(norC > v.si); - v.si <<= shift; - v.si ^= (s.si ^ v.si) & mask; - v.si |= sign; - return v.f; - -#endif -} - -} // namespace fp16_impl } // namespace paddle diff --git a/paddle/math/tests/test_float16.cpp b/paddle/math/tests/test_float16.cpp index 8c74bcc03..f5541d8f0 100644 --- a/paddle/math/tests/test_float16.cpp +++ b/paddle/math/tests/test_float16.cpp @@ -55,10 +55,12 @@ TEST(float16, conversion_cpu) { EXPECT_EQ(float16(false).x, 0x0000); // Implicit conversion to and from Eigen::half + /* Eigen::half tmp = float16(1.0f); float16 v_conv = tmp; EXPECT_EQ(tmp.x, 0x3c00); EXPECT_EQ(v_conv.x, 0x3c00); + */ // Default constructor float16 v_def; -- GitLab