float16.h 30.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
K
Kexin Zhao 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include <stdint.h>
18 19 20

#include <cmath>
#include <iostream>
21
#include <limits>
K
Kexin Zhao 已提交
22

K
Kexin Zhao 已提交
23
#ifdef PADDLE_WITH_CUDA
K
Kexin Zhao 已提交
24
#include <cuda.h>
K
Kexin Zhao 已提交
25
#endif  // PADDLE_WITH_CUDA
26

Y
Y_Xuan 已提交
27 28 29
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
K
Kexin Zhao 已提交
30 31

#if defined(__CUDACC__) && CUDA_VERSION >= 7050
K
Kexin Zhao 已提交
32 33
#define PADDLE_CUDA_FP16
#include <cuda_fp16.h>
K
Kexin Zhao 已提交
34
#endif
35

Y
Y_Xuan 已提交
36 37 38 39
#ifdef __HIPCC__
#define PADDLE_CUDA_FP16
#include <hip/hip_fp16.h>
#endif
K
Kexin Zhao 已提交
40

D
dzhwinter 已提交
41
#if !defined(_WIN32)
K
Kexin Zhao 已提交
42
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
D
dzhwinter 已提交
43
#else
P
peizhilin 已提交
44
#define PADDLE_ALIGN(x) __declspec(align(x))
D
dzhwinter 已提交
45
#endif
K
Kexin Zhao 已提交
46

47 48
#define CUDA_ARCH_FP16_SUPPORTED(CUDA_ARCH) (CUDA_ARCH >= 600)

49 50 51 52 53 54 55 56 57
#if (defined(__CUDACC__) || defined(__HIPCC__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define DEVICE
#define HOST
#endif
58 59 60 61

namespace paddle {
namespace platform {

K
Kexin Zhao 已提交
62 63 64
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
// and aligned at least on a 2-byte boundary, which leads to efficient
// memory access of float16 struct and also makes float16 compatible
65
// with CUDA half, ARM float16_t data types.
K
Kexin Zhao 已提交
66
struct PADDLE_ALIGN(2) float16 {
67
 public:
K
Kexin Zhao 已提交
68
  uint16_t x;
K
Kexin Zhao 已提交
69

K
kexinzhao 已提交
70 71
  // The following defaulted special class member functions
  // are added to make float16 pass the std::is_trivial test
72 73 74 75 76 77
  float16() = default;
  float16(const float16& o) = default;
  float16& operator=(const float16& o) = default;
  float16(float16&& o) = default;
  float16& operator=(float16&& o) = default;
  ~float16() = default;
K
kexinzhao 已提交
78 79

// Constructors
K
Kexin Zhao 已提交
80
#ifdef PADDLE_CUDA_FP16
K
Kexin Zhao 已提交
81
  HOSTDEVICE inline explicit float16(const half& h) {
82
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
83
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
Y
Yu Yang 已提交
84
    x = reinterpret_cast<__half_raw*>(const_cast<half*>(&h))->x;
K
Kexin Zhao 已提交
85 86 87
#else
    x = h.x;
#endif  // CUDA_VERSION >= 9000
Y
Y_Xuan 已提交
88
#endif
K
Kexin Zhao 已提交
89 90 91
  }
#endif  // PADDLE_CUDA_FP16

K
Kexin Zhao 已提交
92
#ifdef PADDLE_WITH_NATIVE_FP16
K
Kexin Zhao 已提交
93
  // __fp16 is a native half precision data type for arm cpu,
94
  // float16_t is an alias for __fp16
K
Kexin Zhao 已提交
95 96
  HOSTDEVICE inline explicit float16(const float16_t& h) {
    x = *reinterpret_cast<const uint16_t*>(&h);
K
Kexin Zhao 已提交
97 98 99
  }
#endif

K
Kexin Zhao 已提交
100
  HOSTDEVICE inline explicit float16(float val) {
101 102
#if defined(PADDLE_CUDA_FP16) && \
    (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
K
Kexin Zhao 已提交
103 104
    half tmp = __float2half(val);
    x = *reinterpret_cast<uint16_t*>(&tmp);
K
Kexin Zhao 已提交
105

106
#elif defined(PADDLE_WITH_NATIVE_FP16)
K
Kexin Zhao 已提交
107 108 109
    float32x4_t tmp = vld1q_dup_f32(&val);
    float16_t res = vget_lane_f16(vcvt_f16_f32(tmp), 0);
    x = *reinterpret_cast<uint16_t*>(&res);
K
Kexin Zhao 已提交
110

K
Kexin Zhao 已提交
111 112
#elif defined(__F16C__)
    x = _cvtss_sh(val, 0);
K
Kexin Zhao 已提交
113

K
Kexin Zhao 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
#else
    // Conversion routine adapted from
    // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
    Bits v, s;
    v.f = val;
    uint32_t sign = v.si & sigN;
    v.si ^= sign;
    sign >>= shiftSign;  // logical shift
    s.si = mulN;
    s.si = s.f * v.f;  // correct subnormals
    v.si ^= (s.si ^ v.si) & -(minN > v.si);
    v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
    v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
    v.ui >>= shift;  // logical shift
    v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
    v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
    x = v.ui | sign;
K
Kexin Zhao 已提交
131

K
Kexin Zhao 已提交
132
#endif
K
Kexin Zhao 已提交
133 134
  }

K
Kexin Zhao 已提交
135
  HOSTDEVICE inline explicit float16(bool b) : x(b ? 0x3c00 : 0) {}
K
Kexin Zhao 已提交
136

K
Kexin Zhao 已提交
137 138 139
  template <class T>
  HOSTDEVICE inline explicit float16(const T& val)
      : x(float16(static_cast<float>(val)).x) {}
K
Kexin Zhao 已提交
140

141
// Assignment operators
K
Kexin Zhao 已提交
142
#ifdef PADDLE_CUDA_FP16
K
Kexin Zhao 已提交
143
  HOSTDEVICE inline float16& operator=(const half& rhs) {
144
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
Y
Yu Yang 已提交
145
    x = reinterpret_cast<__half_raw*>(const_cast<half*>(&rhs))->x;
K
Kexin Zhao 已提交
146 147 148 149 150 151 152
#else
    x = rhs.x;
#endif
    return *this;
  }
#endif

K
Kexin Zhao 已提交
153 154 155
#ifdef PADDLE_WITH_NATIVE_FP16
  HOSTDEVICE inline float16& operator=(const float16_t& rhs) {
    x = *reinterpret_cast<const uint16_t*>(&rhs);
K
Kexin Zhao 已提交
156 157 158 159
    return *this;
  }
#endif

K
Kexin Zhao 已提交
160
  HOSTDEVICE inline float16& operator=(bool b) {
K
Kexin Zhao 已提交
161 162 163 164
    x = b ? 0x3c00 : 0;
    return *this;
  }

K
Kexin Zhao 已提交
165 166
  HOSTDEVICE inline float16& operator=(int8_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
167
    return *this;
K
Kexin Zhao 已提交
168 169
  }

K
Kexin Zhao 已提交
170 171
  HOSTDEVICE inline float16& operator=(uint8_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
172 173 174
    return *this;
  }

K
Kexin Zhao 已提交
175 176
  HOSTDEVICE inline float16& operator=(int16_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
177 178 179
    return *this;
  }

K
Kexin Zhao 已提交
180 181
  HOSTDEVICE inline float16& operator=(uint16_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
182 183 184
    return *this;
  }

K
Kexin Zhao 已提交
185 186
  HOSTDEVICE inline float16& operator=(int32_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
187 188 189
    return *this;
  }

K
Kexin Zhao 已提交
190 191
  HOSTDEVICE inline float16& operator=(uint32_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
192 193 194
    return *this;
  }

K
Kexin Zhao 已提交
195 196
  HOSTDEVICE inline float16& operator=(int64_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
197 198 199
    return *this;
  }

K
Kexin Zhao 已提交
200 201
  HOSTDEVICE inline float16& operator=(uint64_t val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
202 203 204
    return *this;
  }

K
Kexin Zhao 已提交
205 206
  HOSTDEVICE inline float16& operator=(float val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
207 208 209
    return *this;
  }

K
Kexin Zhao 已提交
210 211
  HOSTDEVICE inline float16& operator=(double val) {
    x = float16(val).x;
K
Kexin Zhao 已提交
212
    return *this;
K
Kexin Zhao 已提交
213
  }
K
Kexin Zhao 已提交
214

215
// Conversion opertors
K
Kexin Zhao 已提交
216
#ifdef PADDLE_CUDA_FP16
S
sneaxiy 已提交
217
  HOSTDEVICE inline half to_half() const {
218
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
K
Kexin Zhao 已提交
219 220 221 222 223 224 225 226 227 228
    __half_raw h;
    h.x = x;
    return half(h);
#else
    half h;
    h.x = x;
    return h;
#endif  // CUDA_VERSION >= 9000
  }
#endif  // PADDLE_CUDA_FP16
K
Kexin Zhao 已提交
229

K
Kexin Zhao 已提交
230 231 232
#ifdef PADDLE_WITH_NATIVE_FP16
  HOSTDEVICE inline explicit operator float16_t() const {
    return *reinterpret_cast<const float16_t*>(this);
K
Kexin Zhao 已提交
233 234 235
  }
#endif

S
sneaxiy 已提交
236
  HOSTDEVICE inline operator float() const {
237 238
#if defined(PADDLE_CUDA_FP16) && \
    (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
K
Kexin Zhao 已提交
239 240 241
    half tmp = *reinterpret_cast<const half*>(this);
    return __half2float(tmp);

242
#elif defined(PADDLE_WITH_NATIVE_FP16)
K
Kexin Zhao 已提交
243 244
    float16x4_t res = vld1_dup_f16(reinterpret_cast<const float16_t*>(this));
    return vgetq_lane_f32(vcvt_f32_f16(res), 0);
K
Kexin Zhao 已提交
245

K
Kexin Zhao 已提交
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
#elif defined(__F16C__)
    return _cvtsh_ss(this->x);

#else
    // Conversion routine adapted from
    // http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
    Bits v;
    v.ui = this->x;
    int32_t sign = v.si & sigC;
    v.si ^= sign;
    sign <<= shiftSign;
    v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
    v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
    Bits s;
    s.si = mulC;
    s.f *= v.si;
    int32_t mask = -(norC > v.si);
    v.si <<= shift;
    v.si ^= (s.si ^ v.si) & mask;
    v.si |= sign;
    return v.f;

#endif
K
Kexin Zhao 已提交
269 270
  }

K
Kexin Zhao 已提交
271 272 273
  HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; }

  HOSTDEVICE inline explicit operator int8_t() const {
274
    return static_cast<int8_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
275 276
  }

K
Kexin Zhao 已提交
277
  HOSTDEVICE inline explicit operator uint8_t() const {
278
    return static_cast<uint8_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
279 280
  }

K
Kexin Zhao 已提交
281
  HOSTDEVICE inline explicit operator int16_t() const {
282
    return static_cast<int16_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
283 284
  }

K
Kexin Zhao 已提交
285
  HOSTDEVICE inline explicit operator uint16_t() const {
286
    return static_cast<uint16_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
287 288
  }

K
Kexin Zhao 已提交
289
  HOSTDEVICE inline explicit operator int32_t() const {
290
    return static_cast<int32_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
291 292
  }

K
Kexin Zhao 已提交
293
  HOSTDEVICE inline explicit operator uint32_t() const {
294
    return static_cast<uint32_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
295 296
  }

K
Kexin Zhao 已提交
297
  HOSTDEVICE inline explicit operator int64_t() const {
298
    return static_cast<int64_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
299 300
  }

K
Kexin Zhao 已提交
301
  HOSTDEVICE inline explicit operator uint64_t() const {
302
    return static_cast<uint64_t>(static_cast<float>(*this));
K
Kexin Zhao 已提交
303 304
  }

S
sneaxiy 已提交
305
  HOSTDEVICE inline operator double() const {
306
    return static_cast<double>(static_cast<float>(*this));
K
Kexin Zhao 已提交
307
  }
K
Kexin Zhao 已提交
308

309
 private:
K
Kexin Zhao 已提交
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
  union Bits {
    float f;
    int32_t si;
    uint32_t ui;
  };

  static const int shift = 13;
  static const int shiftSign = 16;

  static const int32_t infN = 0x7F800000;
  static const int32_t maxN = 0x477FE000;  // max flt16 as flt32
  static const int32_t minN = 0x38800000;  // min flt16 normal as flt32
  static const int32_t sigN = 0x80000000;  // sign bit

  static constexpr int32_t infC = infN >> shift;
  static constexpr int32_t nanN = (infC + 1)
                                  << shift;  // minimum flt16 nan as float32
  static constexpr int32_t maxC = maxN >> shift;
  static constexpr int32_t minC = minN >> shift;
  static constexpr int32_t sigC = sigN >> shiftSign;

  static const int32_t mulN = 0x52000000;  // (1 << 23) / minN
  static const int32_t mulC = 0x33800000;  // minN / (1 << (23 - shift))
  static const int32_t subC = 0x003FF;     // max flt32 subnormal downshifted
  static const int32_t norC = 0x00400;     // min flt32 normal downshifted

  static constexpr int32_t maxD = infC - maxC - 1;
  static constexpr int32_t minD = minC - subC - 1;
K
Kexin Zhao 已提交
338 339
};

K
Kexin Zhao 已提交
340 341 342 343 344
// Arithmetic operators on GPU
// CUDA 9.0 provides built-in arithmetic operators for half while
// CUDA 7.5 and 8.0 do not. The arithmetic operators defined here are
// for users to write similar CUDA code in CUDA 7.5 and 8.0 as in
// CUDA 9.0 regarding the half data type.
345 346 347
// ROCM has built-in arithmetic operators as not defined
// __HIP_NO_HALF_OPERATORS__
#if defined(PADDLE_CUDA_FP16) && !defined(__HIPCC__) && CUDA_VERSION < 9000
K
Kexin Zhao 已提交
348
DEVICE inline half operator+(const half& a, const half& b) {
349
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
350
  return __hadd(a, b);
351
#else
352
  float res = static_cast<float>(float16(a)) + static_cast<float>(float16(b));
S
sneaxiy 已提交
353
  return float16(res).to_half();
354
#endif
K
Kexin Zhao 已提交
355 356 357
}

DEVICE inline half operator-(const half& a, const half& b) {
358
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
359
  return __hsub(a, b);
360
#else
361
  float res = static_cast<float>(float16(a)) - static_cast<float>(float16(b));
S
sneaxiy 已提交
362
  return float16(res).to_half();
363
#endif
K
Kexin Zhao 已提交
364 365 366
}

DEVICE inline half operator*(const half& a, const half& b) {
367
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
368
  return __hmul(a, b);
369
#else
370
  float res = static_cast<float>(float16(a)) * static_cast<float>(float16(b));
S
sneaxiy 已提交
371
  return float16(res).to_half();
372
#endif
K
Kexin Zhao 已提交
373 374 375
}

DEVICE inline half operator/(const half& a, const half& b) {
376
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
377 378 379
  float num = __half2float(a);
  float denom = __half2float(b);
  return __float2half(num / denom);
380
#else
381
  float res = static_cast<float>(float16(a)) / static_cast<float>(float16(b));
S
sneaxiy 已提交
382
  return float16(res).to_half();
383
#endif
K
Kexin Zhao 已提交
384 385
}

386
DEVICE inline half operator-(const half& a) {
387
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
388 389
  return __hneg(a);
#else
390
  float res = -static_cast<float>(float16(a));
S
sneaxiy 已提交
391
  return float16(res).to_half();
392 393
#endif
}
K
Kexin Zhao 已提交
394

395
#ifndef PADDLE_WITH_HIP  // not defined __HIP_NO_HALF_OPERATORS__
396
DEVICE inline half& operator+=(half& a, const half& b) {  // NOLINT
K
Kexin Zhao 已提交
397 398 399 400
  a = a + b;
  return a;
}

401
DEVICE inline half& operator-=(half& a, const half& b) {  // NOLINT
K
Kexin Zhao 已提交
402 403 404 405
  a = a - b;
  return a;
}

406
DEVICE inline half& operator*=(half& a, const half& b) {  // NOLINT
K
Kexin Zhao 已提交
407 408 409 410
  a = a * b;
  return a;
}

411
DEVICE inline half& operator/=(half& a, const half& b) {  // NOLINT
K
Kexin Zhao 已提交
412 413 414
  a = a / b;
  return a;
}
415
#endif
K
Kexin Zhao 已提交
416 417

DEVICE inline bool operator==(const half& a, const half& b) {
418
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
419
  return __heq(a, b);
420
#else
421
  return static_cast<float>(float16(a)) == static_cast<float>(float16(b));
422
#endif
K
Kexin Zhao 已提交
423 424 425
}

DEVICE inline bool operator!=(const half& a, const half& b) {
426
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
427
  return __hne(a, b);
428
#else
429
  return static_cast<float>(float16(a)) != static_cast<float>(float16(b));
430
#endif
K
Kexin Zhao 已提交
431 432 433
}

DEVICE inline bool operator<(const half& a, const half& b) {
434
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
435
  return __hlt(a, b);
436
#else
437
  return static_cast<float>(float16(a)) < static_cast<float>(float16(b));
438
#endif
K
Kexin Zhao 已提交
439 440 441
}

DEVICE inline bool operator<=(const half& a, const half& b) {
442
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
443
  return __hle(a, b);
444
#else
445
  return static_cast<float>(float16(a)) <= static_cast<float>(float16(b));
446
#endif
K
Kexin Zhao 已提交
447 448 449
}

DEVICE inline bool operator>(const half& a, const half& b) {
450
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
451
  return __hgt(a, b);
452
#else
453
  return static_cast<float>(float16(a)) > static_cast<float>(float16(b));
454
#endif
K
Kexin Zhao 已提交
455 456 457
}

DEVICE inline bool operator>=(const half& a, const half& b) {
458
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
459
  return __hge(a, b);
460
#else
461
  return static_cast<float>(float16(a)) >= static_cast<float>(float16(b));
462
#endif
K
Kexin Zhao 已提交
463 464
}

465
#endif  // PADDLE_CUDA_FP16
K
Kexin Zhao 已提交
466

467
// Arithmetic operators for float16 on GPU
K
Kexin Zhao 已提交
468
#if defined(PADDLE_CUDA_FP16)
469 470
// HIPCC has compile error if call __device__ function __hadd, __hsub, etc.
// in __host__ __device__ function
471 472
#if defined(__HIPCC__)
DEVICE inline float16 operator+(const float16& a, const float16& b) {
S
sneaxiy 已提交
473
  return float16(__hadd(a.to_half(), b.to_half()));
474 475 476 477 478
}
HOST inline float16 operator+(const float16& a, const float16& b) {
  return float16(static_cast<float>(a) + static_cast<float>(b));
}
#else
K
Kexin Zhao 已提交
479
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
480
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
481
  return float16(__hadd(a.to_half(), b.to_half()));
K
Kexin Zhao 已提交
482
#else
483
  return float16(static_cast<float>(a) + static_cast<float>(b));
K
Kexin Zhao 已提交
484
#endif
485
}
486
#endif
487

488 489
#if defined(__HIPCC__)
DEVICE inline float16 operator-(const float16& a, const float16& b) {
S
sneaxiy 已提交
490
  return float16(__hsub(a.to_half(), b.to_half()));
491 492 493 494 495
}
HOST inline float16 operator-(const float16& a, const float16& b) {
  return float16(static_cast<float>(a) - static_cast<float>(b));
}
#else
K
Kexin Zhao 已提交
496
HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
497
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
498
  return float16(__hsub(a.to_half(), b.to_half()));
K
Kexin Zhao 已提交
499
#else
500
  return float16(static_cast<float>(a) - static_cast<float>(b));
K
Kexin Zhao 已提交
501
#endif
502
}
503
#endif
504

505 506
#if defined(__HIPCC__)
DEVICE inline float16 operator*(const float16& a, const float16& b) {
S
sneaxiy 已提交
507
  return float16(__hmul(a.to_half(), b.to_half()));
508 509 510 511 512
}
HOST inline float16 operator*(const float16& a, const float16& b) {
  return float16(static_cast<float>(a) * static_cast<float>(b));
}
#else
K
Kexin Zhao 已提交
513
HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
514
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
515
  return float16(__hmul(a.to_half(), b.to_half()));
K
Kexin Zhao 已提交
516
#else
517
  return float16(static_cast<float>(a) * static_cast<float>(b));
K
Kexin Zhao 已提交
518
#endif
519
}
520
#endif
521

522 523
#if defined(__HIPCC__)
DEVICE inline float16 operator/(const float16& a, const float16& b) {
S
sneaxiy 已提交
524
  return float16(__hdiv(a.to_half(), b.to_half()));
525 526 527 528 529
}
HOST inline float16 operator/(const float16& a, const float16& b) {
  return float16(static_cast<float>(a) / static_cast<float>(b));
}
#else
K
Kexin Zhao 已提交
530
HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
531
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
K
Kexin Zhao 已提交
532
  // TODO(kexinzhao): check which cuda version starts to support __hdiv
S
sneaxiy 已提交
533 534
  float num = __half2float(a.to_half());
  float denom = __half2float(b.to_half());
535
  return float16(num / denom);
K
Kexin Zhao 已提交
536
#else
537
  return float16(static_cast<float>(a) / static_cast<float>(b));
K
Kexin Zhao 已提交
538
#endif
539
}
540
#endif
541

542 543
#if defined(__HIPCC__)
DEVICE inline float16 operator-(const float16& a) {
S
sneaxiy 已提交
544
  return float16(__hneg(a.to_half()));
545 546 547 548 549 550 551
}
HOST inline float16 operator-(const float16& a) {
  float16 res;
  res.x = a.x ^ 0x8000;
  return res;
}
#else
K
Kexin Zhao 已提交
552
HOSTDEVICE inline float16 operator-(const float16& a) {
553
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
554
  return float16(__hneg(a.to_half()));
K
Kexin Zhao 已提交
555 556 557 558
#else
  float16 res;
  res.x = a.x ^ 0x8000;
  return res;
K
Kexin Zhao 已提交
559
#endif
560
}
561
#endif
562

563
HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) {  // NOLINT
564 565 566 567
  a = a + b;
  return a;
}

568
HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) {  // NOLINT
569 570 571 572
  a = a - b;
  return a;
}

573
HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) {  // NOLINT
574 575 576 577
  a = a * b;
  return a;
}

578
HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) {  // NOLINT
579 580 581 582
  a = a / b;
  return a;
}

583 584
// HIPCC has compile error if call __device__ function __heq, __hne, etc.
// in __host__ __device__ function
585 586
#if defined(__HIPCC__)
DEVICE inline bool operator==(const float16& a, const float16& b) {
S
sneaxiy 已提交
587
  return __heq(a.to_half(), b.to_half());
588 589 590 591
}
HOST inline bool operator==(const float16& a, const float16& b) {
  return static_cast<float>(a) == static_cast<float>(b);
}
592
#else  // __HIPCC__
K
Kexin Zhao 已提交
593
HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
594
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
595
  return __heq(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
596
#else
597
  return static_cast<float>(a) == static_cast<float>(b);
K
Kexin Zhao 已提交
598
#endif
599
}
600
#endif  // __HIPCC__
601

602 603
#if defined(__HIPCC__)
DEVICE inline bool operator!=(const float16& a, const float16& b) {
S
sneaxiy 已提交
604
  return __hne(a.to_half(), b.to_half());
605 606 607 608 609
}
HOST inline bool operator!=(const float16& a, const float16& b) {
  return static_cast<float>(a) != static_cast<float>(b);
}
#else  // __HIPCC__
K
Kexin Zhao 已提交
610
HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
611
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
612
  return __hne(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
613
#else
614
  return static_cast<float>(a) != static_cast<float>(b);
K
Kexin Zhao 已提交
615
#endif
616
}
617
#endif  // __HIPCC__
618

619 620
#if defined(__HIPCC__)
DEVICE inline bool operator<(const float16& a, const float16& b) {
S
sneaxiy 已提交
621
  return __hlt(a.to_half(), b.to_half());
622 623 624 625 626
}
HOST inline bool operator<(const float16& a, const float16& b) {
  return static_cast<float>(a) < static_cast<float>(b);
}
#else  // __HIPCC__
K
Kexin Zhao 已提交
627
HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
628
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
629
  return __hlt(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
630
#else
631
  return static_cast<float>(a) < static_cast<float>(b);
K
Kexin Zhao 已提交
632
#endif
633
}
634
#endif  // __HIPCC__
635

636 637
#if defined(__HIPCC__)
DEVICE inline bool operator<=(const float16& a, const float16& b) {
S
sneaxiy 已提交
638
  return __hle(a.to_half(), b.to_half());
639 640 641 642 643
}
HOST inline bool operator<=(const float16& a, const float16& b) {
  return static_cast<float>(a) <= static_cast<float>(b);
}
#else  // __HIPCC__
K
Kexin Zhao 已提交
644
HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
645
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
646
  return __hle(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
647
#else
648
  return static_cast<float>(a) <= static_cast<float>(b);
K
Kexin Zhao 已提交
649
#endif
650
}
651
#endif  // __HIPCC__
652

653 654
#if defined(__HIPCC__)
DEVICE inline bool operator>(const float16& a, const float16& b) {
S
sneaxiy 已提交
655
  return __hgt(a.to_half(), b.to_half());
656 657 658 659 660
}
HOST inline bool operator>(const float16& a, const float16& b) {
  return static_cast<float>(a) > static_cast<float>(b);
}
#else  // __HIPCC__
K
Kexin Zhao 已提交
661
HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
662
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
663
  return __hgt(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
664
#else
665
  return static_cast<float>(a) > static_cast<float>(b);
K
Kexin Zhao 已提交
666
#endif
667
}
668
#endif  // __HIPCC__
669

670 671
#if defined(__HIPCC__)
DEVICE inline bool operator>=(const float16& a, const float16& b) {
S
sneaxiy 已提交
672
  return __hge(a.to_half(), b.to_half());
673 674 675 676 677
}
HOST inline bool operator>=(const float16& a, const float16& b) {
  return static_cast<float>(a) >= static_cast<float>(b);
}
#else  // __HIPCC__
K
Kexin Zhao 已提交
678
HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
679
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
680
  return __hge(a.to_half(), b.to_half());
K
Kexin Zhao 已提交
681
#else
682
  return static_cast<float>(a) >= static_cast<float>(b);
K
Kexin Zhao 已提交
683
#endif
684
}
685
#endif  // __HIPCC__
686 687 688

// Arithmetic operators for float16 on ARMv8.2-A CPU
#elif defined(PADDLE_WITH_NATIVE_FP16)
689
inline float16 operator+(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
690 691 692 693 694 695 696 697
  float16 res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fadd h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
698
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
699 700 701 702 703 704
      [res_ptr] "r"(&(res.x))
      :  // clobbers
      "memory", "v0", "v1");
  return res;
}

705
inline float16 operator-(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
706 707 708 709 710 711 712 713
  float16 res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fsub h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
714
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
715 716 717 718 719 720
      [res_ptr] "r"(&(res.x))
      :  // clobbers
      "memory", "v0", "v1");
  return res;
}

721
inline float16 operator*(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
722 723 724 725 726 727 728 729
  float16 res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fmul h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
730
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
731 732 733 734 735 736
      [res_ptr] "r"(&(res.x))
      :  // clobbers
      "memory", "v0", "v1");
  return res;
}

737
inline float16 operator/(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
738 739 740 741 742 743 744 745
  float16 res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fdiv h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
746
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
747 748 749 750 751
      [res_ptr] "r"(&(res.x))
      :  // clobbers
      "memory", "v0", "v1");
  return res;
}
K
Kexin Zhao 已提交
752

753
inline float16 operator-(const float16& a) {
K
Kexin Zhao 已提交
754 755 756 757 758 759 760 761 762 763 764 765 766 767
  float16 res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "fneg h0, h0\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
      [a_ptr] "r"(&(a.x)),
      [res_ptr] "r"(&(res.x))
      :  // clobbers
      "memory", "v0");
  return res;
}

768
inline float16& operator+=(float16& a, const float16& b) {  // NOLINT
K
Kexin Zhao 已提交
769 770 771 772
  a = a + b;
  return a;
}

773
inline float16& operator-=(float16& a, const float16& b) {  // NOLINT
K
Kexin Zhao 已提交
774 775 776 777
  a = a - b;
  return a;
}

778
inline float16& operator*=(float16& a, const float16& b) {  // NOLINT
K
Kexin Zhao 已提交
779 780 781 782
  a = a * b;
  return a;
}

783
inline float16& operator/=(float16& a, const float16& b) {  // NOLINT
K
Kexin Zhao 已提交
784 785 786 787
  a = a / b;
  return a;
}

788
inline bool operator==(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
789 790 791 792 793 794 795 796
  uint16_t res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fcmeq h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
797
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
798 799 800 801 802 803
      [res_ptr] "r"(&res)
      :  // clobbers
      "memory", "v0", "v1");
  return (res & 0xffff) != 0;
}

804
inline bool operator!=(const float16& a, const float16& b) { return !(a == b); }
K
Kexin Zhao 已提交
805

806
inline bool operator<(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
807 808 809 810 811 812 813 814
  uint16_t res;
  asm volatile(
      "ld1 {v1.h}[0], [%[a_ptr]]\n"
      "ld1 {v0.h}[0], [%[b_ptr]]\n"
      "fcmgt h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
815
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
816 817 818 819 820 821
      [res_ptr] "r"(&res)
      :  // clobbers
      "memory", "v0", "v1");
  return (res & 0xffff) != 0;
}

822
inline bool operator<=(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
823 824 825 826 827 828 829 830
  uint16_t res;
  asm volatile(
      "ld1 {v1.h}[0], [%[a_ptr]]\n"
      "ld1 {v0.h}[0], [%[b_ptr]]\n"
      "fcmge h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
831
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
832 833 834 835 836 837
      [res_ptr] "r"(&res)
      :  // clobbers
      "memory", "v0", "v1");
  return (res & 0xffff) != 0;
}

838
inline bool operator>(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
839 840 841 842 843 844 845 846
  uint16_t res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fcmgt h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
847
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
848 849 850 851 852 853
      [res_ptr] "r"(&res)
      :  // clobbers
      "memory", "v0", "v1");
  return (res & 0xffff) != 0;
}

854
inline bool operator>=(const float16& a, const float16& b) {
K
Kexin Zhao 已提交
855 856 857 858 859 860 861 862
  uint16_t res;
  asm volatile(
      "ld1 {v0.h}[0], [%[a_ptr]]\n"
      "ld1 {v1.h}[0], [%[b_ptr]]\n"
      "fcmge h0, h0, h1\n"
      "st1 {v0.h}[0], [%[res_ptr]]\n"
      :  // outputs
      :  // inputs
863
      [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
K
Kexin Zhao 已提交
864 865 866 867 868 869
      [res_ptr] "r"(&res)
      :  // clobbers
      "memory", "v0", "v1");
  return (res & 0xffff) != 0;
}

K
Kexin Zhao 已提交
870
// Arithmetic operators for float16, software emulated on other CPU
K
Kexin Zhao 已提交
871
#else
872
inline float16 operator+(const float16& a, const float16& b) {
873
  return float16(static_cast<float>(a) + static_cast<float>(b));
K
Kexin Zhao 已提交
874 875
}

876
inline float16 operator-(const float16& a, const float16& b) {
877
  return float16(static_cast<float>(a) - static_cast<float>(b));
K
Kexin Zhao 已提交
878 879
}

880
inline float16 operator*(const float16& a, const float16& b) {
881
  return float16(static_cast<float>(a) * static_cast<float>(b));
K
Kexin Zhao 已提交
882 883
}

884
inline float16 operator/(const float16& a, const float16& b) {
885
  return float16(static_cast<float>(a) / static_cast<float>(b));
K
Kexin Zhao 已提交
886 887
}

888
inline float16 operator-(const float16& a) {
K
Kexin Zhao 已提交
889 890 891 892 893
  float16 res;
  res.x = a.x ^ 0x8000;
  return res;
}

894 895
inline float16& operator+=(float16& a, const float16& b) {  // NOLINT
  a = float16(static_cast<float>(a) + static_cast<float>(b));
K
Kexin Zhao 已提交
896 897 898
  return a;
}

899 900
inline float16& operator-=(float16& a, const float16& b) {  // NOLINT
  a = float16(static_cast<float>(a) - static_cast<float>(b));
K
Kexin Zhao 已提交
901 902 903
  return a;
}

904 905
inline float16& operator*=(float16& a, const float16& b) {  // NOLINT
  a = float16(static_cast<float>(a) * static_cast<float>(b));
K
Kexin Zhao 已提交
906 907 908
  return a;
}

909 910
inline float16& operator/=(float16& a, const float16& b) {  // NOLINT
  a = float16(static_cast<float>(a) / static_cast<float>(b));
K
Kexin Zhao 已提交
911 912 913
  return a;
}

914
inline bool operator==(const float16& a, const float16& b) {
915
  return static_cast<float>(a) == static_cast<float>(b);
K
Kexin Zhao 已提交
916 917
}

918
inline bool operator!=(const float16& a, const float16& b) {
919
  return static_cast<float>(a) != static_cast<float>(b);
K
Kexin Zhao 已提交
920 921
}

922
inline bool operator<(const float16& a, const float16& b) {
923
  return static_cast<float>(a) < static_cast<float>(b);
K
Kexin Zhao 已提交
924 925
}

926
inline bool operator<=(const float16& a, const float16& b) {
927
  return static_cast<float>(a) <= static_cast<float>(b);
K
Kexin Zhao 已提交
928 929
}

930
inline bool operator>(const float16& a, const float16& b) {
931
  return static_cast<float>(a) > static_cast<float>(b);
K
Kexin Zhao 已提交
932 933
}

934
inline bool operator>=(const float16& a, const float16& b) {
935
  return static_cast<float>(a) >= static_cast<float>(b);
K
Kexin Zhao 已提交
936
}
K
Kexin Zhao 已提交
937
#endif
K
kexinzhao 已提交
938

939 940 941 942 943 944
HOSTDEVICE inline float16 raw_uint16_to_float16(uint16_t a) {
  float16 res;
  res.x = a;
  return res;
}

945 946 947
// HIPCC has compile error if call __device__ function __hisnan in __host__
// __device__ function
#if defined(PADDLE_CUDA_FP16) && defined(__HIPCC__)
S
sneaxiy 已提交
948
DEVICE inline bool(isnan)(const float16& a) { return __hisnan(a.to_half()); }
949 950
HOST inline bool(isnan)(const float16& a) { return (a.x & 0x7fff) > 0x7c00; }
#else
951
HOSTDEVICE inline bool(isnan)(const float16& a) {
952
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
S
sneaxiy 已提交
953
  return __hisnan(a.to_half());
954 955 956 957
#else
  return (a.x & 0x7fff) > 0x7c00;
#endif
}
958
#endif
959 960 961 962 963 964 965 966 967

HOSTDEVICE inline bool(isinf)(const float16& a) {
  return (a.x & 0x7fff) == 0x7c00;
}

HOSTDEVICE inline bool(isfinite)(const float16& a) {
  return !((isnan)(a)) && !((isinf)(a));
}

968
HOSTDEVICE inline float16(abs)(const float16& a) {
969 970
#if defined(PADDLE_CUDA_FP16) && \
    (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530))
971 972 973 974 975 976
  return float16(::fabs(static_cast<float>(a)));
#else
  return float16(std::abs(static_cast<float>(a)));
#endif
}

977 978 979 980 981
inline std::ostream& operator<<(std::ostream& os, const float16& a) {
  os << static_cast<float>(a);
  return os;
}

K
kexinzhao 已提交
982
}  // namespace platform
K
Kexin Zhao 已提交
983
}  // namespace paddle
K
kexinzhao 已提交
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001

namespace std {

// Override the std::is_pod::value for float16
// The reason is that different compilers implemented std::is_pod based on
// different C++ standards. float16 class is a plain old data in C++11 given
// that it is both trivial and standard_layout.
// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
// more restricted in that you cannot provide any customized
// constructor in float16. Hence, we override is_pod here following C++11
// so that .cu files can be successfully compiled by nvcc.
template <>
struct is_pod<paddle::platform::float16> {
  static const bool value =
      is_trivial<paddle::platform::float16>::value &&
      is_standard_layout<paddle::platform::float16>::value;
};

1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
template <>
struct is_floating_point<paddle::platform::float16>
    : std::integral_constant<
          bool, std::is_same<paddle::platform::float16,
                             typename std::remove_cv<
                                 paddle::platform::float16>::type>::value> {};
template <>
struct is_signed<paddle::platform::float16> {
  static const bool value = true;
};

template <>
struct is_unsigned<paddle::platform::float16> {
  static const bool value = false;
};

inline bool isnan(const paddle::platform::float16& a) {
  return paddle::platform::isnan(a);
}

inline bool isinf(const paddle::platform::float16& a) {
  return paddle::platform::isinf(a);
}

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
template <>
struct numeric_limits<paddle::platform::float16> {
  static const bool is_specialized = true;
  static const bool is_signed = true;
  static const bool is_integer = false;
  static const bool is_exact = false;
  static const bool has_infinity = true;
  static const bool has_quiet_NaN = true;
  static const bool has_signaling_NaN = true;
  static const float_denorm_style has_denorm = denorm_present;
  static const bool has_denorm_loss = false;
  static const std::float_round_style round_style = std::round_to_nearest;
  static const bool is_iec559 = false;
  static const bool is_bounded = false;
  static const bool is_modulo = false;
  static const int digits = 11;
  static const int digits10 = 3;
  static const int max_digits10 = 5;
  static const int radix = 2;
  static const int min_exponent = -13;
  static const int min_exponent10 = -4;
  static const int max_exponent = 16;
  static const int max_exponent10 = 4;
  static const bool traps = true;
  static const bool tinyness_before = false;

Y
Y_Xuan 已提交
1052
  HOSTDEVICE static paddle::platform::float16(min)() {
1053 1054
    return paddle::platform::raw_uint16_to_float16(0x400);
  }
Y
Y_Xuan 已提交
1055
  HOSTDEVICE static paddle::platform::float16 lowest() {
1056 1057
    return paddle::platform::raw_uint16_to_float16(0xfbff);
  }
Y
Y_Xuan 已提交
1058
  HOSTDEVICE static paddle::platform::float16(max)() {
1059 1060
    return paddle::platform::raw_uint16_to_float16(0x7bff);
  }
Y
Y_Xuan 已提交
1061
  HOSTDEVICE static paddle::platform::float16 epsilon() {
1062 1063
    return paddle::platform::raw_uint16_to_float16(0x0800);
  }
Y
Y_Xuan 已提交
1064
  HOSTDEVICE static paddle::platform::float16 round_error() {
1065 1066
    return paddle::platform::float16(0.5);
  }
Y
Y_Xuan 已提交
1067
  HOSTDEVICE static paddle::platform::float16 infinity() {
1068 1069
    return paddle::platform::raw_uint16_to_float16(0x7c00);
  }
Y
Y_Xuan 已提交
1070
  HOSTDEVICE static paddle::platform::float16 quiet_NaN() {
1071 1072
    return paddle::platform::raw_uint16_to_float16(0x7e00);
  }
Y
Y_Xuan 已提交
1073
  HOSTDEVICE static paddle::platform::float16 signaling_NaN() {
1074 1075
    return paddle::platform::raw_uint16_to_float16(0x7e00);
  }
Y
Y_Xuan 已提交
1076
  HOSTDEVICE static paddle::platform::float16 denorm_min() {
1077 1078 1079 1080
    return paddle::platform::raw_uint16_to_float16(0x1);
  }
};

1081 1082 1083 1084 1085
HOSTDEVICE inline paddle::platform::float16 abs(
    const paddle::platform::float16& a) {
  return paddle::platform::abs(a);
}

K
kexinzhao 已提交
1086
}  // namespace std