Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
78375990
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
78375990
编写于
12月 27, 2021
作者:
S
sneaxiy
提交者:
GitHub
12月 27, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine float16 implementation (#38439)
上级
5f7e4a21
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
57 addition
and
57 deletion
+57
-57
paddle/fluid/operators/clip_op.h
paddle/fluid/operators/clip_op.h
+1
-1
paddle/fluid/platform/device/gpu/cuda/cuda_device_function.h
paddle/fluid/platform/device/gpu/cuda/cuda_device_function.h
+2
-2
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+33
-33
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+21
-21
未找到文件。
paddle/fluid/operators/clip_op.h
浏览文件 @
78375990
...
@@ -95,7 +95,7 @@ class ClipKernel : public framework::OpKernel<T> {
...
@@ -95,7 +95,7 @@ class ClipKernel : public framework::OpKernel<T> {
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"max should be greater than or equal to min. "
"max should be greater than or equal to min. "
"But received min = %f, max = %f"
,
"But received min = %f, max = %f"
,
min
,
max
));
static_cast
<
float
>
(
min
),
static_cast
<
float
>
(
max
)
));
auto
*
x_var
=
context
.
InputVar
(
"X"
);
auto
*
x_var
=
context
.
InputVar
(
"X"
);
if
(
x_var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
x_var
->
IsType
<
framework
::
LoDTensor
>
())
{
...
...
paddle/fluid/platform/device/gpu/cuda/cuda_device_function.h
浏览文件 @
78375990
...
@@ -73,7 +73,7 @@ template <>
...
@@ -73,7 +73,7 @@ template <>
__forceinline__
__device__
float16
CudaShuffleDownSync
(
unsigned
mask
,
__forceinline__
__device__
float16
CudaShuffleDownSync
(
unsigned
mask
,
float16
val
,
int
delta
,
float16
val
,
int
delta
,
int
width
)
{
int
width
)
{
return
float16
(
__shfl_down_sync
(
mask
,
static_cast
<
half
>
(
val
),
return
float16
(
__shfl_down_sync
(
mask
,
val
.
to_half
(
),
static_cast
<
unsigned
>
(
delta
),
width
));
static_cast
<
unsigned
>
(
delta
),
width
));
}
}
...
@@ -103,7 +103,7 @@ CudaShuffleDownSync(unsigned mask, paddle::platform::complex<double> val,
...
@@ -103,7 +103,7 @@ CudaShuffleDownSync(unsigned mask, paddle::platform::complex<double> val,
template
<
>
template
<
>
__forceinline__
__device__
float16
CudaShuffleXorSync
(
unsigned
mask
,
__forceinline__
__device__
float16
CudaShuffleXorSync
(
unsigned
mask
,
float16
val
,
int
width
)
{
float16
val
,
int
width
)
{
return
float16
(
__shfl_xor_sync
(
mask
,
static_cast
<
half
>
(
val
),
width
));
return
float16
(
__shfl_xor_sync
(
mask
,
val
.
to_half
(
),
width
));
}
}
template
<
>
template
<
>
...
...
paddle/fluid/platform/float16.h
浏览文件 @
78375990
...
@@ -214,7 +214,7 @@ struct PADDLE_ALIGN(2) float16 {
...
@@ -214,7 +214,7 @@ struct PADDLE_ALIGN(2) float16 {
// Conversion opertors
// Conversion opertors
#ifdef PADDLE_CUDA_FP16
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
explicit
operator
half
()
const
{
HOSTDEVICE
inline
half
to_
half
()
const
{
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION >= 9000
__half_raw
h
;
__half_raw
h
;
h
.
x
=
x
;
h
.
x
=
x
;
...
@@ -233,7 +233,7 @@ struct PADDLE_ALIGN(2) float16 {
...
@@ -233,7 +233,7 @@ struct PADDLE_ALIGN(2) float16 {
}
}
#endif
#endif
HOSTDEVICE
inline
explicit
operator
float
()
const
{
HOSTDEVICE
inline
operator
float
()
const
{
#if defined(PADDLE_CUDA_FP16) && \
#if defined(PADDLE_CUDA_FP16) && \
(defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
(defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
half
tmp
=
*
reinterpret_cast
<
const
half
*>
(
this
);
half
tmp
=
*
reinterpret_cast
<
const
half
*>
(
this
);
...
@@ -302,7 +302,7 @@ struct PADDLE_ALIGN(2) float16 {
...
@@ -302,7 +302,7 @@ struct PADDLE_ALIGN(2) float16 {
return
static_cast
<
uint64_t
>
(
static_cast
<
float
>
(
*
this
));
return
static_cast
<
uint64_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
double
()
const
{
HOSTDEVICE
inline
operator
double
()
const
{
return
static_cast
<
double
>
(
static_cast
<
float
>
(
*
this
));
return
static_cast
<
double
>
(
static_cast
<
float
>
(
*
this
));
}
}
...
@@ -350,7 +350,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
...
@@ -350,7 +350,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
return
__hadd
(
a
,
b
);
return
__hadd
(
a
,
b
);
#else
#else
float
res
=
static_cast
<
float
>
(
float16
(
a
))
+
static_cast
<
float
>
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
+
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
)
);
return
float16
(
res
).
to_half
(
);
#endif
#endif
}
}
...
@@ -359,7 +359,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
...
@@ -359,7 +359,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
return
__hsub
(
a
,
b
);
return
__hsub
(
a
,
b
);
#else
#else
float
res
=
static_cast
<
float
>
(
float16
(
a
))
-
static_cast
<
float
>
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
-
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
)
);
return
float16
(
res
).
to_half
(
);
#endif
#endif
}
}
...
@@ -368,7 +368,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
...
@@ -368,7 +368,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
return
__hmul
(
a
,
b
);
return
__hmul
(
a
,
b
);
#else
#else
float
res
=
static_cast
<
float
>
(
float16
(
a
))
*
static_cast
<
float
>
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
*
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
)
);
return
float16
(
res
).
to_half
(
);
#endif
#endif
}
}
...
@@ -379,7 +379,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
...
@@ -379,7 +379,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
return
__float2half
(
num
/
denom
);
return
__float2half
(
num
/
denom
);
#else
#else
float
res
=
static_cast
<
float
>
(
float16
(
a
))
/
static_cast
<
float
>
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
/
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
)
);
return
float16
(
res
).
to_half
(
);
#endif
#endif
}
}
...
@@ -388,7 +388,7 @@ DEVICE inline half operator-(const half& a) {
...
@@ -388,7 +388,7 @@ DEVICE inline half operator-(const half& a) {
return
__hneg
(
a
);
return
__hneg
(
a
);
#else
#else
float
res
=
-
static_cast
<
float
>
(
float16
(
a
));
float
res
=
-
static_cast
<
float
>
(
float16
(
a
));
return
half
(
float16
(
res
)
);
return
float16
(
res
).
to_half
(
);
#endif
#endif
}
}
...
@@ -470,7 +470,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
...
@@ -470,7 +470,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
// in __host__ __device__ function
// in __host__ __device__ function
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
return
float16
(
__hadd
(
a
.
to_half
(),
b
.
to_half
(
)));
}
}
HOST
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
...
@@ -478,7 +478,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
...
@@ -478,7 +478,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
#else
#else
HOSTDEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
return
float16
(
__hadd
(
a
.
to_half
(),
b
.
to_half
(
)));
#else
#else
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
#endif
#endif
...
@@ -487,7 +487,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
...
@@ -487,7 +487,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
return
float16
(
__hsub
(
a
.
to_half
(),
b
.
to_half
(
)));
}
}
HOST
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
...
@@ -495,7 +495,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
...
@@ -495,7 +495,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
#else
#else
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
return
float16
(
__hsub
(
a
.
to_half
(),
b
.
to_half
(
)));
#else
#else
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
#endif
#endif
...
@@ -504,7 +504,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
...
@@ -504,7 +504,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
return
float16
(
__hmul
(
a
.
to_half
(),
b
.
to_half
(
)));
}
}
HOST
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
...
@@ -512,7 +512,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
...
@@ -512,7 +512,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
#else
#else
HOSTDEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
return
float16
(
__hmul
(
a
.
to_half
(),
b
.
to_half
(
)));
#else
#else
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
#endif
#endif
...
@@ -521,7 +521,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
...
@@ -521,7 +521,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
__hdiv
(
half
(
a
),
half
(
b
)));
return
float16
(
__hdiv
(
a
.
to_half
(),
b
.
to_half
(
)));
}
}
HOST
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
...
@@ -530,8 +530,8 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
...
@@ -530,8 +530,8 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
HOSTDEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// TODO(kexinzhao): check which cuda version starts to support __hdiv
// TODO(kexinzhao): check which cuda version starts to support __hdiv
float
num
=
__half2float
(
half
(
a
));
float
num
=
__half2float
(
a
.
to_half
(
));
float
denom
=
__half2float
(
half
(
b
));
float
denom
=
__half2float
(
b
.
to_half
(
));
return
float16
(
num
/
denom
);
return
float16
(
num
/
denom
);
#else
#else
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
...
@@ -541,7 +541,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
...
@@ -541,7 +541,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
DEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
return
float16
(
__hneg
(
half
(
a
)));
return
float16
(
__hneg
(
a
.
to_half
(
)));
}
}
HOST
inline
float16
operator
-
(
const
float16
&
a
)
{
HOST
inline
float16
operator
-
(
const
float16
&
a
)
{
float16
res
;
float16
res
;
...
@@ -551,7 +551,7 @@ HOST inline float16 operator-(const float16& a) {
...
@@ -551,7 +551,7 @@ HOST inline float16 operator-(const float16& a) {
#else
#else
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
HOSTDEVICE
inline
float16
operator
-
(
const
float16
&
a
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hneg
(
half
(
a
)));
return
float16
(
__hneg
(
a
.
to_half
(
)));
#else
#else
float16
res
;
float16
res
;
res
.
x
=
a
.
x
^
0x8000
;
res
.
x
=
a
.
x
^
0x8000
;
...
@@ -584,7 +584,7 @@ HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { // NOLINT
...
@@ -584,7 +584,7 @@ HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) { // NOLINT
// in __host__ __device__ function
// in __host__ __device__ function
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__heq
(
half
(
a
),
half
(
b
));
return
__heq
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
...
@@ -592,7 +592,7 @@ HOST inline bool operator==(const float16& a, const float16& b) {
...
@@ -592,7 +592,7 @@ HOST inline bool operator==(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__heq
(
half
(
a
),
half
(
b
));
return
__heq
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -601,7 +601,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
...
@@ -601,7 +601,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__hne
(
half
(
a
),
half
(
b
));
return
__hne
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
...
@@ -609,7 +609,7 @@ HOST inline bool operator!=(const float16& a, const float16& b) {
...
@@ -609,7 +609,7 @@ HOST inline bool operator!=(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hne
(
half
(
a
),
half
(
b
));
return
__hne
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -618,7 +618,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
...
@@ -618,7 +618,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__hlt
(
half
(
a
),
half
(
b
));
return
__hlt
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
...
@@ -626,7 +626,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
...
@@ -626,7 +626,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hlt
(
half
(
a
),
half
(
b
));
return
__hlt
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -635,7 +635,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
...
@@ -635,7 +635,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__hle
(
half
(
a
),
half
(
b
));
return
__hle
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
...
@@ -643,7 +643,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
...
@@ -643,7 +643,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hle
(
half
(
a
),
half
(
b
));
return
__hle
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -652,7 +652,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
...
@@ -652,7 +652,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__hgt
(
half
(
a
),
half
(
b
));
return
__hgt
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
...
@@ -660,7 +660,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
...
@@ -660,7 +660,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hgt
(
half
(
a
),
half
(
b
));
return
__hgt
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -669,7 +669,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
...
@@ -669,7 +669,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
#if defined(__HIPCC__)
#if defined(__HIPCC__)
DEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
DEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
__hge
(
half
(
a
),
half
(
b
));
return
__hge
(
a
.
to_half
(),
b
.
to_half
(
));
}
}
HOST
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOST
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
...
@@ -677,7 +677,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
...
@@ -677,7 +677,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
#else // __HIPCC__
#else // __HIPCC__
HOSTDEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hge
(
half
(
a
),
half
(
b
));
return
__hge
(
a
.
to_half
(),
b
.
to_half
(
));
#else
#else
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
#endif
#endif
...
@@ -945,12 +945,12 @@ HOSTDEVICE inline float16 raw_uint16_to_float16(uint16_t a) {
...
@@ -945,12 +945,12 @@ HOSTDEVICE inline float16 raw_uint16_to_float16(uint16_t a) {
// HIPCC has compile error if call __device__ function __hisnan in __host__
// HIPCC has compile error if call __device__ function __hisnan in __host__
// __device__ function
// __device__ function
#if defined(PADDLE_CUDA_FP16) && defined(__HIPCC__)
#if defined(PADDLE_CUDA_FP16) && defined(__HIPCC__)
DEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
return
__hisnan
(
half
(
a
));
}
DEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
return
__hisnan
(
a
.
to_half
(
));
}
HOST
inline
bool
(
isnan
)(
const
float16
&
a
)
{
return
(
a
.
x
&
0x7fff
)
>
0x7c00
;
}
HOST
inline
bool
(
isnan
)(
const
float16
&
a
)
{
return
(
a
.
x
&
0x7fff
)
>
0x7c00
;
}
#else
#else
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hisnan
(
half
(
a
));
return
__hisnan
(
a
.
to_half
(
));
#else
#else
return
(
a
.
x
&
0x7fff
)
>
0x7c00
;
return
(
a
.
x
&
0x7fff
)
>
0x7c00
;
#endif
#endif
...
...
paddle/fluid/platform/float16_test.cu
浏览文件 @
78375990
...
@@ -48,8 +48,8 @@ limitations under the License. */
...
@@ -48,8 +48,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
...
@@ -73,8 +73,8 @@ limitations under the License. */
...
@@ -73,8 +73,8 @@ limitations under the License. */
hipMalloc(reinterpret_cast<void **>(&d_in2), size); \
hipMalloc(reinterpret_cast<void **>(&d_in2), size); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2); \
...
@@ -99,8 +99,8 @@ limitations under the License. */
...
@@ -99,8 +99,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<bool *>(malloc(1)); \
out = reinterpret_cast<bool *>(malloc(1)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
hipLaunchKernelGGL(op_type, dim3(1), dim3(1), 0, 0, d_in1, d_in2, d_out); \
...
@@ -126,8 +126,8 @@ limitations under the License. */
...
@@ -126,8 +126,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<half *>(malloc(size)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
...
@@ -151,8 +151,8 @@ limitations under the License. */
...
@@ -151,8 +151,8 @@ limitations under the License. */
cudaMalloc(reinterpret_cast<void **>(&d_in2), size); \
cudaMalloc(reinterpret_cast<void **>(&d_in2), size); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2); \
op_type<<<1, 1>>>(d_in1, d_in2); \
...
@@ -177,8 +177,8 @@ limitations under the License. */
...
@@ -177,8 +177,8 @@ limitations under the License. */
in1 = reinterpret_cast<half *>(malloc(size)); \
in1 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
in2 = reinterpret_cast<half *>(malloc(size)); \
out = reinterpret_cast<bool *>(malloc(1)); \
out = reinterpret_cast<bool *>(malloc(1)); \
in1[0] =
half(float16(v_in1));
\
in1[0] =
float16(v_in1).to_half();
\
in2[0] =
half(float16(v_in2));
\
in2[0] =
float16(v_in2).to_half();
\
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
op_type<<<1, 1>>>(d_in1, d_in2, d_out); \
...
@@ -221,7 +221,7 @@ void TestNeg(float v_in, float v_out) {
...
@@ -221,7 +221,7 @@ void TestNeg(float v_in, float v_out) {
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_in
),
size
);
cudaMalloc
(
reinterpret_cast
<
void
**>
(
&
d_in
),
size
);
#endif
#endif
in
=
reinterpret_cast
<
half
*>
(
malloc
(
size
));
in
=
reinterpret_cast
<
half
*>
(
malloc
(
size
));
in
[
0
]
=
half
(
float16
(
v_in
)
);
in
[
0
]
=
float16
(
v_in
).
to_half
(
);
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipMemcpy
(
d_in
,
in
,
size
,
hipMemcpyHostToDevice
);
hipMemcpy
(
d_in
,
in
,
size
,
hipMemcpyHostToDevice
);
#else
#else
...
@@ -299,17 +299,17 @@ TEST(float16, comparision_on_gpu) {
...
@@ -299,17 +299,17 @@ TEST(float16, comparision_on_gpu) {
TEST
(
float16
,
conversion_on_gpu
)
{
TEST
(
float16
,
conversion_on_gpu
)
{
// Explicit conversion to and from cuda half
// Explicit conversion to and from cuda half
EXPECT_EQ
(
float16
(
half
(
float16
(
1.0
f
)
)).
x
,
0x3c00
);
EXPECT_EQ
(
float16
(
float16
(
1.0
f
).
to_half
(
)).
x
,
0x3c00
);
EXPECT_EQ
(
float16
(
half
(
float16
(
0.5
f
)
)).
x
,
0x3800
);
EXPECT_EQ
(
float16
(
float16
(
0.5
f
).
to_half
(
)).
x
,
0x3800
);
EXPECT_EQ
(
float16
(
half
(
float16
(
0.33333
f
)
)).
x
,
0x3555
);
EXPECT_EQ
(
float16
(
float16
(
0.33333
f
).
to_half
(
)).
x
,
0x3555
);
EXPECT_EQ
(
float16
(
half
(
float16
(
0.0
f
)
)).
x
,
0x0000
);
EXPECT_EQ
(
float16
(
float16
(
0.0
f
).
to_half
(
)).
x
,
0x0000
);
EXPECT_EQ
(
float16
(
half
(
float16
(
-
0.0
f
)
)).
x
,
0x8000
);
EXPECT_EQ
(
float16
(
float16
(
-
0.0
f
).
to_half
(
)).
x
,
0x8000
);
EXPECT_EQ
(
float16
(
half
(
float16
(
65504.0
f
)
)).
x
,
0x7bff
);
EXPECT_EQ
(
float16
(
float16
(
65504.0
f
).
to_half
(
)).
x
,
0x7bff
);
EXPECT_EQ
(
float16
(
half
(
float16
(
65536.0
f
)
)).
x
,
0x7c00
);
EXPECT_EQ
(
float16
(
float16
(
65536.0
f
).
to_half
(
)).
x
,
0x7c00
);
// Assignment operator
// Assignment operator
float16
v_assign
;
float16
v_assign
;
v_assign
=
half
(
float16
(
1.0
f
)
);
v_assign
=
float16
(
1.0
f
).
to_half
(
);
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录