Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a5feb771
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a5feb771
编写于
11月 27, 2017
作者:
K
Kexin Zhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
address pr comment
上级
19e5c24f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
482 addition
and
359 deletion
+482
-359
paddle/math/float16.h
paddle/math/float16.h
+480
-359
paddle/math/tests/test_float16.cpp
paddle/math/tests/test_float16.cpp
+2
-0
未找到文件。
paddle/math/float16.h
浏览文件 @
a5feb771
...
...
@@ -16,9 +16,14 @@ limitations under the License. */
#include <cstdint>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/platform/hostdevice.h"
#ifdef __GNUC__
#define PADDLE_GNUC_VER (__GNUC__ * 10 + __GNUC_MINOR__)
#else
...
...
@@ -31,25 +36,12 @@ limitations under the License. */
#define PADDLE_CLANG_VER 0
#endif // __clang__
#ifdef __CUDACC__
#define PADDLE_HOSTDEVICE __host__ __device__
#if CUDA_VERSION >= 7050
#if defined(__CUDACC__) && CUDA_VERSION >= 7050
#define PADDLE_CUDA_FP16
#include <cuda_fp16.h>
#endif // CUDA_VERSION >= 7050
#else
#define PADDLE_HOSTDEVICE
#endif // __CUDACC__
#ifdef __arm__
#define PADDLE_ARM_32
#endif
#ifdef __aarch64__
#define PADDLE_ARM_64
#endif
#if defined(PADDLE_ARM_32) || defined(PADDLE_ARM_64)
#if defined(__arm__) || defined(__aarch64__)
#define PADDLE_ARM
#endif
...
...
@@ -58,19 +50,12 @@ limitations under the License. */
#include <arm_neon.h>
#endif
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_32)
#define PADDLE_NEON_32
#endif
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_64)
#define PADDLE_NEON_64
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 62 || PADDLE_CLANG_VER >= 37)
#define PADDLE_WITH_NATIVE_FP16
#endif
#ifdef PADDLE_ARM
#ifdef __F16C__
#undef __F16C__
#endif // __F16C__
#else
#ifndef PADDLE_ARM
#include <immintrin.h>
#endif // PADDLE_ARM
...
...
@@ -78,27 +63,20 @@ limitations under the License. */
namespace
paddle
{
struct
float16
;
namespace
fp16_impl
{
// Convert from float to half precision in round-to-nearest-even mode
PADDLE_HOSTDEVICE
inline
float16
float_to_half_rn
(
float
f
);
PADDLE_HOSTDEVICE
inline
float
half_to_float
(
float16
h
);
}
// namespace fp16_impl
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
// and aligned at least on a 2-byte boundary, which leads to efficient
// memory access of float16 struct and also makes float16 compatible
// with CUDA half, ARM float16_t, and Eigen::half data types.
struct
PADDLE_ALIGN
(
2
)
float16
{
public:
uint16_t
x
;
PADDLE_
HOSTDEVICE
inline
float16
()
:
x
(
0
)
{}
HOSTDEVICE
inline
float16
()
:
x
(
0
)
{}
PADDLE_
HOSTDEVICE
inline
float16
(
const
float16
&
h
)
:
x
(
h
.
x
)
{}
HOSTDEVICE
inline
float16
(
const
float16
&
h
)
:
x
(
h
.
x
)
{}
#ifdef PADDLE_CUDA_FP16
PADDLE_HOSTDEVICE
inline
float16
(
const
half
&
h
)
{
HOSTDEVICE
inline
explicit
float16
(
const
half
&
h
)
{
#if CUDA_VERSION >= 9000
x
=
reinterpret_cast
<
__half_raw
*>
(
&
h
)
->
x
;
#else
...
...
@@ -107,78 +85,64 @@ struct PADDLE_ALIGN(2) float16 {
}
#endif // PADDLE_CUDA_FP16
PADDLE_HOSTDEVICE
inline
float16
(
const
Eigen
::
half
&
h
)
:
x
(
h
.
x
)
{}
HOSTDEVICE
inline
explicit
float16
(
const
Eigen
::
half
&
h
)
:
x
(
h
.
x
)
{}
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
#ifdef PADDLE_WITH_NATIVE_FP16
// __fp16 is a native half precision data type for arm cpu,
// float16_t is an alias for __fp16 in arm_fp16.h,
// which is included in arm_neon.h.
PADDLE_HOSTDEVICE
inline
float16
(
const
float16_t
&
h
)
{
float16_t
tmp
=
h
;
x
=
*
reinterpret_cast
<
uint16_t
*>
(
&
tmp
);
HOSTDEVICE
inline
explicit
float16
(
const
float16_t
&
h
)
{
x
=
*
reinterpret_cast
<
const
uint16_t
*>
(
&
h
);
}
#endif
PADDLE_HOSTDEVICE
inline
explicit
float16
(
bool
b
)
:
x
(
b
?
0x3c00
:
0
)
{}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
int8_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
uint8_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
int16_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
uint16_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
HOSTDEVICE
inline
explicit
float16
(
float
val
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half
tmp
=
__float2half
(
val
);
x
=
*
reinterpret_cast
<
uint16_t
*>
(
&
tmp
);
PADDLE_HOSTDEVICE
inline
explicit
float16
(
int32_t
val
)
{
float
16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
)
);
x
=
res
.
x
;
}
#elif defined(PADDLE_NEON)
float
32x4_t
tmp
=
vld1q_dup_f32
(
&
val
);
float16_t
res
=
vget_lane_f16
(
vcvt_f16_f32
(
tmp
),
0
)
;
x
=
*
reinterpret_cast
<
uint16_t
*>
(
&
res
);
PADDLE_HOSTDEVICE
inline
explicit
float16
(
uint32_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
#elif defined(__F16C__)
x
=
_cvtss_sh
(
val
,
0
);
PADDLE_HOSTDEVICE
inline
explicit
float16
(
int64_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
#else
// Conversion routine adapted from
// http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
Bits
v
,
s
;
v
.
f
=
val
;
uint32_t
sign
=
v
.
si
&
sigN
;
v
.
si
^=
sign
;
sign
>>=
shiftSign
;
// logical shift
s
.
si
=
mulN
;
s
.
si
=
s
.
f
*
v
.
f
;
// correct subnormals
v
.
si
^=
(
s
.
si
^
v
.
si
)
&
-
(
minN
>
v
.
si
);
v
.
si
^=
(
infN
^
v
.
si
)
&
-
((
infN
>
v
.
si
)
&
(
v
.
si
>
maxN
));
v
.
si
^=
(
nanN
^
v
.
si
)
&
-
((
nanN
>
v
.
si
)
&
(
v
.
si
>
infN
));
v
.
ui
>>=
shift
;
// logical shift
v
.
si
^=
((
v
.
si
-
maxD
)
^
v
.
si
)
&
-
(
v
.
si
>
maxC
);
v
.
si
^=
((
v
.
si
-
minD
)
^
v
.
si
)
&
-
(
v
.
si
>
subC
);
x
=
v
.
ui
|
sign
;
PADDLE_HOSTDEVICE
inline
explicit
float16
(
uint64_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
#endif
}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
float
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
val
);
x
=
res
.
x
;
}
HOSTDEVICE
inline
explicit
float16
(
bool
b
)
:
x
(
b
?
0x3c00
:
0
)
{}
PADDLE_HOSTDEVICE
inline
explicit
float16
(
double
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
}
template
<
class
T
>
HOSTDEVICE
inline
explicit
float16
(
const
T
&
val
)
:
x
(
float16
(
static_cast
<
float
>
(
val
)).
x
)
{}
PADDLE_
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
rhs
)
{
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
rhs
)
{
x
=
rhs
.
x
;
return
*
this
;
}
#ifdef PADDLE_CUDA_FP16
PADDLE_
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
#if CUDA_VERSION >= 9000
x
=
reinterpret_cast
<
__half_raw
*>
(
&
rhs
)
->
x
;
#else
...
...
@@ -188,87 +152,75 @@ struct PADDLE_ALIGN(2) float16 {
}
#endif
PADDLE_
HOSTDEVICE
inline
float16
&
operator
=
(
const
Eigen
::
half
&
rhs
)
{
HOSTDEVICE
inline
float16
&
operator
=
(
const
Eigen
::
half
&
rhs
)
{
x
=
rhs
.
x
;
return
*
this
;
}
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
const
float16_t
&
rhs
)
{
float16_t
tmp
=
rhs
;
x
=
*
reinterpret_cast
<
uint16_t
*>
(
&
tmp
);
#ifdef PADDLE_WITH_NATIVE_FP16
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16_t
&
rhs
)
{
x
=
*
reinterpret_cast
<
const
uint16_t
*>
(
&
rhs
);
return
*
this
;
}
#endif
PADDLE_
HOSTDEVICE
inline
float16
&
operator
=
(
bool
b
)
{
HOSTDEVICE
inline
float16
&
operator
=
(
bool
b
)
{
x
=
b
?
0x3c00
:
0
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
int8_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
int8_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
uint8_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
uint8_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
int16_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
int16_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
uint16_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
uint16_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
int32_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
int32_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
uint32_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
uint32_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
int64_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
int64_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
uint64_t
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
uint64_t
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
float
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
val
);
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
float
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
PADDLE_HOSTDEVICE
inline
float16
&
operator
=
(
double
val
)
{
float16
res
=
fp16_impl
::
float_to_half_rn
(
static_cast
<
float
>
(
val
));
x
=
res
.
x
;
HOSTDEVICE
inline
float16
&
operator
=
(
double
val
)
{
x
=
float16
(
val
).
x
;
return
*
this
;
}
#ifdef PADDLE_CUDA_FP16
PADDLE_HOSTDEVICE
inline
operator
half
()
const
{
HOSTDEVICE
inline
explicit
operator
half
()
const
{
#if CUDA_VERSION >= 9000
__half_raw
h
;
h
.
x
=
x
;
...
...
@@ -281,186 +233,504 @@ struct PADDLE_ALIGN(2) float16 {
}
#endif // PADDLE_CUDA_FP16
PADDLE_HOSTDEVICE
inline
operator
Eigen
::
half
()
const
{
HOSTDEVICE
inline
explicit
operator
Eigen
::
half
()
const
{
Eigen
::
half
h
;
h
.
x
=
x
;
return
h
;
}
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
PADDLE_HOSTDEVICE
inline
operator
float16_t
()
const
{
float16
h
=
*
this
;
return
*
reinterpret_cast
<
float16_t
*>
(
&
h
);
#ifdef PADDLE_WITH_NATIVE_FP16
HOSTDEVICE
inline
explicit
operator
float16_t
()
const
{
return
*
reinterpret_cast
<
const
float16_t
*>
(
this
);
}
#endif
PADDLE_HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
float
()
const
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half
tmp
=
*
reinterpret_cast
<
const
half
*>
(
this
);
return
__half2float
(
tmp
);
#elif defined(PADDLE_NEON)
float16x4_t
res
=
vld1_dup_f16
(
reinterpret_cast
<
const
float16_t
*>
(
this
));
return
vgetq_lane_f32
(
vcvt_f32_f16
(
res
),
0
);
PADDLE_HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
return
static_cast
<
int8_t
>
(
fp16_impl
::
half_to_float
(
*
this
));
#elif defined(__F16C__)
return
_cvtsh_ss
(
this
->
x
);
#else
// Conversion routine adapted from
// http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
Bits
v
;
v
.
ui
=
this
->
x
;
int32_t
sign
=
v
.
si
&
sigC
;
v
.
si
^=
sign
;
sign
<<=
shiftSign
;
v
.
si
^=
((
v
.
si
+
minD
)
^
v
.
si
)
&
-
(
v
.
si
>
subC
);
v
.
si
^=
((
v
.
si
+
maxD
)
^
v
.
si
)
&
-
(
v
.
si
>
maxC
);
Bits
s
;
s
.
si
=
mulC
;
s
.
f
*=
v
.
si
;
int32_t
mask
=
-
(
norC
>
v
.
si
);
v
.
si
<<=
shift
;
v
.
si
^=
(
s
.
si
^
v
.
si
)
&
mask
;
v
.
si
|=
sign
;
return
v
.
f
;
#endif
}
PADDLE_HOSTDEVICE
inline
explicit
operator
uint8_t
()
const
{
return
static_cast
<
uint8_t
>
(
fp16_impl
::
half_to_float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
return
static_cast
<
int8_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
int16
_t
()
const
{
return
static_cast
<
int16_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
uint8
_t
()
const
{
return
static_cast
<
uint8_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
u
int16_t
()
const
{
return
static_cast
<
uint16_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
int16_t
()
const
{
return
static_cast
<
int16_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
int32
_t
()
const
{
return
static_cast
<
int32_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
uint16
_t
()
const
{
return
static_cast
<
uint16_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
u
int32_t
()
const
{
return
static_cast
<
uint32_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
int32_t
()
const
{
return
static_cast
<
int32_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
int64
_t
()
const
{
return
static_cast
<
int64_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
uint32
_t
()
const
{
return
static_cast
<
uint32_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
u
int64_t
()
const
{
return
static_cast
<
uint64_t
>
(
fp16_impl
::
half_to_
float
(
*
this
));
HOSTDEVICE
inline
explicit
operator
int64_t
()
const
{
return
static_cast
<
int64_t
>
(
float
(
*
this
));
}
PADDLE_HOSTDEVICE
inline
explicit
operator
floa
t
()
const
{
return
fp16_impl
::
half_to_float
(
*
this
);
HOSTDEVICE
inline
explicit
operator
uint64_
t
()
const
{
return
static_cast
<
uint64_t
>
(
float
(
*
this
)
);
}
PADDLE_
HOSTDEVICE
inline
explicit
operator
double
()
const
{
return
static_cast
<
double
>
(
f
p16_impl
::
half_to_f
loat
(
*
this
));
HOSTDEVICE
inline
explicit
operator
double
()
const
{
return
static_cast
<
double
>
(
float
(
*
this
));
}
private:
union
Bits
{
float
f
;
int32_t
si
;
uint32_t
ui
;
};
static
const
int
shift
=
13
;
static
const
int
shiftSign
=
16
;
static
const
int32_t
infN
=
0x7F800000
;
static
const
int32_t
maxN
=
0x477FE000
;
// max flt16 as flt32
static
const
int32_t
minN
=
0x38800000
;
// min flt16 normal as flt32
static
const
int32_t
sigN
=
0x80000000
;
// sign bit
static
constexpr
int32_t
infC
=
infN
>>
shift
;
static
constexpr
int32_t
nanN
=
(
infC
+
1
)
<<
shift
;
// minimum flt16 nan as float32
static
constexpr
int32_t
maxC
=
maxN
>>
shift
;
static
constexpr
int32_t
minC
=
minN
>>
shift
;
static
constexpr
int32_t
sigC
=
sigN
>>
shiftSign
;
static
const
int32_t
mulN
=
0x52000000
;
// (1 << 23) / minN
static
const
int32_t
mulC
=
0x33800000
;
// minN / (1 << (23 - shift))
static
const
int32_t
subC
=
0x003FF
;
// max flt32 subnormal downshifted
static
const
int32_t
norC
=
0x00400
;
// min flt32 normal downshifted
static
constexpr
int32_t
maxD
=
infC
-
maxC
-
1
;
static
constexpr
int32_t
minD
=
minC
-
subC
-
1
;
};
// Arithmetic operators
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
__device__
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
// Arithmetic operators on GPU
// CUDA 9.0 provides built-in arithmetic operators for half while
// CUDA 7.5 and 8.0 do not. The arithmetic operators defined here are
// for users to write similar CUDA code in CUDA 7.5 and 8.0 as in
// CUDA 9.0 regarding the half data type.
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && \
__CUDA_ARCH__ >= 530 && CUDA_VERSION < 9000
DEVICE
inline
half
operator
+
(
const
half
&
a
,
const
half
&
b
)
{
return
__hadd
(
a
,
b
);
}
DEVICE
inline
half
operator
-
(
const
half
&
a
,
const
half
&
b
)
{
return
__hsub
(
a
,
b
);
}
DEVICE
inline
half
operator
*
(
const
half
&
a
,
const
half
&
b
)
{
return
__hmul
(
a
,
b
);
}
DEVICE
inline
half
operator
/
(
const
half
&
a
,
const
half
&
b
)
{
float
num
=
__half2float
(
a
);
float
denom
=
__half2float
(
b
);
return
__float2half
(
num
/
denom
);
}
DEVICE
inline
half
operator
-
(
const
half
&
a
)
{
return
__hneg
(
a
);
}
DEVICE
inline
half
&
operator
+=
(
half
&
a
,
const
half
&
b
)
{
a
=
a
+
b
;
return
a
;
}
DEVICE
inline
half
&
operator
-=
(
half
&
a
,
const
half
&
b
)
{
a
=
a
-
b
;
return
a
;
}
DEVICE
inline
half
&
operator
*=
(
half
&
a
,
const
half
&
b
)
{
a
=
a
*
b
;
return
a
;
}
DEVICE
inline
half
&
operator
/=
(
half
&
a
,
const
half
&
b
)
{
a
=
a
/
b
;
return
a
;
}
DEVICE
inline
bool
operator
==
(
const
half
&
a
,
const
half
&
b
)
{
return
__heq
(
a
,
b
);
}
DEVICE
inline
bool
operator
!=
(
const
half
&
a
,
const
half
&
b
)
{
return
__hne
(
a
,
b
);
}
DEVICE
inline
bool
operator
<
(
const
half
&
a
,
const
half
&
b
)
{
return
__hlt
(
a
,
b
);
}
DEVICE
inline
bool
operator
<=
(
const
half
&
a
,
const
half
&
b
)
{
return
__hle
(
a
,
b
);
}
DEVICE
inline
bool
operator
>
(
const
half
&
a
,
const
half
&
b
)
{
return
__hgt
(
a
,
b
);
}
DEVICE
inline
bool
operator
>=
(
const
half
&
a
,
const
half
&
b
)
{
return
__hge
(
a
,
b
);
}
/*
DEVICE inline float16 operator+(const float16& a, const float16& b) {
return float16(__hadd(half(a), half(b)));
}
__device__
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16 operator-(const float16& a, const float16& b) {
return float16(__hsub(half(a), half(b)));
}
__device__
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16 operator*(const float16& a, const float16& b) {
return float16(__hmul(half(a), half(b)));
}
__device__
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
// TODO(kexinzhao): check the cuda version that starts to support __hdiv
DEVICE inline float16 operator/(const float16& a, const float16& b) {
float num = __half2float(half(a));
float denom = __half2float(half(b));
return float16(num / denom);
}
__device__
inline
float16
operator
-
(
const
float16
&
a
)
{
DEVICE
inline float16 operator-(const float16& a) {
return float16(__hneg(half(a)));
}
__device__
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16& operator+=(float16& a, const float16& b) {
a = a + b;
return a;
}
__device__
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16& operator-=(float16& a, const float16& b) {
a = a - b;
return a;
}
__device__
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16& operator*=(float16& a, const float16& b) {
a = a * b;
return a;
}
__device__
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline float16& operator/=(float16& a, const float16& b) {
a = a / b;
return a;
}
__device__
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator==(const float16& a, const float16& b) {
return __heq(half(a), half(b));
}
__device__
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator!=(const float16& a, const float16& b) {
return __hne(half(a), half(b));
}
__device__
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator<(const float16& a, const float16& b) {
return __hlt(half(a), half(b));
}
__device__
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator<=(const float16& a, const float16& b) {
return __hle(half(a), half(b));
}
__device__
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator>(const float16& a, const float16& b) {
return __hgt(half(a), half(b));
}
__device__
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline bool operator>=(const float16& a, const float16& b) {
return __hge(half(a), half(b));
}
*/
// Arithmetic operators on ARMv8.2-A CPU
#elif defined(PADDLE_WITH_NATIVE_FP16)
HOST
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
float16
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fadd h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
res
;
}
HOST
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
float16
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fsub h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
res
;
}
HOST
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
float16
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fmul h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
res
;
}
HOST
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
float16
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fdiv h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
res
;
}
// On ARMv8.2-A CPU
#elif defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
(PADDLE_GNUC_VER >= 71 || PADDLE_CLANG_VER >= 39)
__host__
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
float16
operator
-
(
const
float16
&
a
)
{
float16
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"fneg h0, h0
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
res_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
);
return
res
;
}
HOST
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
a
+
b
;
return
a
;
}
HOST
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
a
-
b
;
return
a
;
}
HOST
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
a
*
b
;
return
a
;
}
HOST
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
a
/
b
;
return
a
;
}
HOST
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
uint16_t
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fcmeq h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
(
res
&
0xffff
)
!=
0
;
}
HOST
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
!
(
a
==
b
);
}
HOST
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
uint16_t
res
;
asm
volatile
(
"ld1 {v1.h}[0], [%[a_ptr]]
\n
"
"ld1 {v0.h}[0], [%[b_ptr]]
\n
"
"fcmgt h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
(
res
&
0xffff
)
!=
0
;
}
HOST
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
uint16_t
res
;
asm
volatile
(
"ld1 {v1.h}[0], [%[a_ptr]]
\n
"
"ld1 {v0.h}[0], [%[b_ptr]]
\n
"
"fcmge h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
(
res
&
0xffff
)
!=
0
;
}
HOST
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
uint16_t
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fcmgt h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
(
res
&
0xffff
)
!=
0
;
}
HOST
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
uint16_t
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[a_ptr]]
\n
"
"ld1 {v1.h}[0], [%[b_ptr]]
\n
"
"fcmge h0, h0, h1
\n
"
"st1 {v0.h}[0], [%[res_ptr]]
\n
"
:
// outputs
:
// inputs
[
a_ptr
]
"r"
(
&
(
a
.
x
)),
[
b_ptr
]
"r"
(
&
(
b
.
x
)),
[
res_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
,
"v1"
);
return
(
res
&
0xffff
)
!=
0
;
}
/*
HOST inline float16 operator+(const float16& a, const float16& b) {
return float16(vaddh_f16(float16_t(a), float16_t(b)));
}
__host__
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16 operator-(const float16& a, const float16& b) {
return float16(vsubh_f16(float16_t(a), float16_t(b)));
}
__host__
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16 operator*(const float16& a, const float16& b) {
return float16(vmulh_f16(float16_t(a), float16_t(b)));
}
__host__
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16 operator/(const float16& a, const float16& b) {
return float16(vdivh_f16(float16_t(a), float16_t(b)));
}
__host__
inline
float16
operator
-
(
const
float16
&
a
)
{
HOST
inline float16 operator-(const float16& a) {
return float16(vnegh_f16(float16_t(a)));
}
__host__
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16& operator+=(float16& a, const float16& b) {
a = a + b;
return a;
}
__host__
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16& operator-=(float16& a, const float16& b) {
a = a - b;
return a;
}
__host__
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16& operator*=(float16& a, const float16& b) {
a = a * b;
return a;
}
__host__
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
HOST
inline float16& operator/=(float16& a, const float16& b) {
a = a / b;
return a;
}
__host__
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator==(const float16& a, const float16& b) {
return static_cast<bool>(vceqh_f16(float16_t(a), float16_t(b)));
}
__host__
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator!=(const float16& a, const float16& b) {
return !(a == b);
}
__host__
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator<(const float16& a, const float16& b) {
#ifdef PADDLE_NEON_64
return static_cast<bool>(vclth_f16(float16_t(a), float16_t(b)));
#else
...
...
@@ -468,7 +738,7 @@ __host__ inline bool operator<(const float16& a, const float16& b) {
#endif // PADDLE_NEON_64
}
__host__
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator<=(const float16& a, const float16& b) {
#ifdef PADDLE_NEON_64
return static_cast<bool>(vcleh_f16(float16_t(a), float16_t(b)));
#else
...
...
@@ -476,7 +746,7 @@ __host__ inline bool operator<=(const float16& a, const float16& b) {
#endif // PADDLE_NEON_64
}
__host__
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator>(const float16& a, const float16& b) {
#ifdef PADDLE_NEON_64
return static_cast<bool>(vcgth_f16(float16_t(a), float16_t(b)));
#else
...
...
@@ -484,231 +754,82 @@ __host__ inline bool operator>(const float16& a, const float16& b) {
#endif // PADDLE_NEON_64
}
__host__
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline bool operator>=(const float16& a, const float16& b) {
#ifdef PADDLE_NEON_64
return static_cast<bool>(vcgeh_f16(float16_t(a), float16_t(b)));
#else
return float(a) >= float(b);
#endif // PADDLE_NEON_64
}
*/
#else // Software emulation on other cpu
PADDLE_HOSTDEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
// Arithmetic operators, software emulated on other CPU
#else
HOSTDEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
+
float
(
b
));
}
PADDLE_
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
-
float
(
b
));
}
PADDLE_
HOSTDEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
*
float
(
b
));
}
PADDLE_
HOSTDEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
/
float
(
b
));
}
PADDLE_
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
float16
res
;
res
.
x
=
a
.
x
^
0x8000
;
return
res
;
}
PADDLE_
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
float16
(
float
(
a
)
+
float
(
b
));
return
a
;
}
PADDLE_
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
float16
(
float
(
a
)
-
float
(
b
));
return
a
;
}
PADDLE_
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
float16
(
float
(
a
)
*
float
(
b
));
return
a
;
}
PADDLE_
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
a
=
float16
(
float
(
a
)
/
float
(
b
));
return
a
;
}
PADDLE_
HOSTDEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
==
float
(
b
);
}
PADDLE_
HOSTDEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
!=
float
(
b
);
}
PADDLE_
HOSTDEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<
float
(
b
);
}
PADDLE_
HOSTDEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<=
float
(
b
);
}
PADDLE_
HOSTDEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>
float
(
b
);
}
PADDLE_
HOSTDEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>=
float
(
b
);
}
#endif
namespace
fp16_impl
{
union
Bits
{
float
f
;
int32_t
si
;
uint32_t
ui
;
};
const
int
shift
=
13
;
const
int
shiftSign
=
16
;
const
int32_t
infN
=
0x7F800000
;
const
int32_t
maxN
=
0x477FE000
;
// max flt16 as flt32
const
int32_t
minN
=
0x38800000
;
// min flt16 normal as flt32
const
int32_t
sigN
=
0x80000000
;
// sign bit
constexpr
int32_t
infC
=
infN
>>
shift
;
constexpr
int32_t
nanN
=
(
infC
+
1
)
<<
shift
;
// minimum flt16 nan as float32
constexpr
int32_t
maxC
=
maxN
>>
shift
;
constexpr
int32_t
minC
=
minN
>>
shift
;
constexpr
int32_t
sigC
=
sigN
>>
shiftSign
;
const
int32_t
mulN
=
0x52000000
;
// (1 << 23) / minN
const
int32_t
mulC
=
0x33800000
;
// minN / (1 << (23 - shift))
const
int32_t
subC
=
0x003FF
;
// max flt32 subnormal downshifted
const
int32_t
norC
=
0x00400
;
// min flt32 normal downshifted
constexpr
int32_t
maxD
=
infC
-
maxC
-
1
;
constexpr
int32_t
minD
=
minC
-
subC
-
1
;
PADDLE_HOSTDEVICE
inline
float16
float_to_half_rn
(
float
f
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half
tmp
=
__float2half
(
f
);
return
*
reinterpret_cast
<
float16
*>
(
&
tmp
);
#elif defined(PADDLE_NEON_64)
float16
res
;
asm
volatile
(
"ld1 {v0.s}[0], [%[float_ptr]]
\n
"
"fcvt h0, s0
\n
"
"st1 {v0.h}[0], [%[half_ptr]]
\n
"
:
// outputs
:
// inputs
[
float_ptr
]
"r"
(
&
f
),
[
half_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"v0"
);
return
res
;
#elif defined(PADDLE_NEON_32)
float16
res
;
asm
volatile
(
"vld1.32 {d0[0]}, [%[float_ptr]]
\n
"
"vcvt.f16.f32 d0, q0
\n
"
"vst1.16 {d0[0]}, [%[half_ptr]]
\n
"
:
// outputs
:
// inputs
[
float_ptr
]
"r"
(
&
f
),
[
half_ptr
]
"r"
(
&
(
res
.
x
))
:
// clobbers
"memory"
,
"d0"
);
return
res
;
#elif defined(__F16C__)
float16
res
;
res
.
x
=
_cvtss_sh
(
f
,
0
);
return
res
;
#else
// Conversion routine adapted from
// http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
Bits
v
,
s
;
v
.
f
=
f
;
uint32_t
sign
=
v
.
si
&
sigN
;
v
.
si
^=
sign
;
sign
>>=
shiftSign
;
// logical shift
s
.
si
=
mulN
;
s
.
si
=
s
.
f
*
v
.
f
;
// correct subnormals
v
.
si
^=
(
s
.
si
^
v
.
si
)
&
-
(
minN
>
v
.
si
);
v
.
si
^=
(
infN
^
v
.
si
)
&
-
((
infN
>
v
.
si
)
&
(
v
.
si
>
maxN
));
v
.
si
^=
(
nanN
^
v
.
si
)
&
-
((
nanN
>
v
.
si
)
&
(
v
.
si
>
infN
));
v
.
ui
>>=
shift
;
// logical shift
v
.
si
^=
((
v
.
si
-
maxD
)
^
v
.
si
)
&
-
(
v
.
si
>
maxC
);
v
.
si
^=
((
v
.
si
-
minD
)
^
v
.
si
)
&
-
(
v
.
si
>
subC
);
float16
res
;
res
.
x
=
v
.
ui
|
sign
;
return
res
;
#endif
}
PADDLE_HOSTDEVICE
inline
float
half_to_float
(
float16
h
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half
tmp
=
*
reinterpret_cast
<
half
*>
(
&
h
);
return
__half2float
(
tmp
);
#elif defined(PADDLE_NEON_64)
float
res
;
asm
volatile
(
"ld1 {v0.h}[0], [%[half_ptr]]
\n
"
"fcvt s0, h0
\n
"
"st1 {v0.s}[0], [%[float_ptr]]
\n
"
:
// outputs
:
// inputs
[
half_ptr
]
"r"
(
&
(
h
.
x
)),
[
float_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
);
return
res
;
#elif defined(PADDLE_NEON_32)
float
res
;
asm
volatile
(
"vld1.16 {d0[0]}, [%[half_ptr]]
\n
"
"vcvt.f32.f16 q0, d0
\n
"
"vst1.32 {d0[0]}, [%[float_ptr]]
\n
"
:
// outputs
:
// inputs
[
half_ptr
]
"r"
(
&
(
h
.
x
)),
[
float_ptr
]
"r"
(
&
res
)
:
// clobbers
"memory"
,
"v0"
);
return
res
;
#elif defined(__F16C__)
return
_cvtsh_ss
(
h
.
x
);
#else
// Conversion routine adapted from
// http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
Bits
v
;
v
.
ui
=
h
.
x
;
int32_t
sign
=
v
.
si
&
sigC
;
v
.
si
^=
sign
;
sign
<<=
shiftSign
;
v
.
si
^=
((
v
.
si
+
minD
)
^
v
.
si
)
&
-
(
v
.
si
>
subC
);
v
.
si
^=
((
v
.
si
+
maxD
)
^
v
.
si
)
&
-
(
v
.
si
>
maxC
);
Bits
s
;
s
.
si
=
mulC
;
s
.
f
*=
v
.
si
;
int32_t
mask
=
-
(
norC
>
v
.
si
);
v
.
si
<<=
shift
;
v
.
si
^=
(
s
.
si
^
v
.
si
)
&
mask
;
v
.
si
|=
sign
;
return
v
.
f
;
#endif
}
}
// namespace fp16_impl
}
// namespace paddle
paddle/math/tests/test_float16.cpp
浏览文件 @
a5feb771
...
...
@@ -55,10 +55,12 @@ TEST(float16, conversion_cpu) {
EXPECT_EQ
(
float16
(
false
).
x
,
0x0000
);
// Implicit conversion to and from Eigen::half
/*
Eigen::half tmp = float16(1.0f);
float16 v_conv = tmp;
EXPECT_EQ(tmp.x, 0x3c00);
EXPECT_EQ(v_conv.x, 0x3c00);
*/
// Default constructor
float16
v_def
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录