Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
b2a1c9e8
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b2a1c9e8
编写于
4月 06, 2018
作者:
K
Kexin Zhao
提交者:
Yi Wang
4月 06, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add float16 support to non-cudnn softmax op on GPU (#9686)
* initial commit * fix error * fix typo and order
上级
797a7184
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
189 addition
and
74 deletion
+189
-74
paddle/fluid/operators/math/softmax.cu
paddle/fluid/operators/math/softmax.cu
+3
-0
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+1
-1
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+7
-2
paddle/fluid/operators/softmax_op.cu.cc
paddle/fluid/operators/softmax_op.cu.cc
+6
-5
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+161
-66
python/paddle/fluid/tests/unittests/test_softmax_op.py
python/paddle/fluid/tests/unittests/test_softmax_op.py
+11
-0
未找到文件。
paddle/fluid/operators/math/softmax.cu
浏览文件 @
b2a1c9e8
...
@@ -14,6 +14,8 @@ limitations under the License. */
...
@@ -14,6 +14,8 @@ limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
...
@@ -95,6 +97,7 @@ template class SoftmaxCUDNNFunctor<double>;
...
@@ -95,6 +97,7 @@ template class SoftmaxCUDNNFunctor<double>;
template
class
SoftmaxGradCUDNNFunctor
<
float
>;
template
class
SoftmaxGradCUDNNFunctor
<
float
>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
b2a1c9e8
...
@@ -27,7 +27,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
...
@@ -27,7 +27,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template
<
typename
T
>
template
<
typename
T
>
struct
ValueClip
{
struct
ValueClip
{
HOSTDEVICE
T
operator
()(
const
T
&
x
)
const
{
HOSTDEVICE
T
operator
()(
const
T
&
x
)
const
{
const
T
kThreshold
=
-
64.
;
const
T
kThreshold
=
static_cast
<
T
>
(
-
64.
)
;
return
x
<
kThreshold
?
kThreshold
:
x
;
return
x
<
kThreshold
?
kThreshold
:
x
;
}
}
};
};
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
b2a1c9e8
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/operators/softmax_op.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
#endif
...
@@ -20,6 +23,7 @@ limitations under the License. */
...
@@ -20,6 +23,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -60,8 +64,8 @@ class SoftmaxOp : public framework::OperatorWithKernel {
...
@@ -60,8 +64,8 @@ class SoftmaxOp : public framework::OperatorWithKernel {
auto
input_data_type
=
auto
input_data_type
=
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
PADDLE_ENFORCE
_EQ
(
library_
,
framework
::
LibraryType
::
kCUDNN
,
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
())
,
"float16 can only be used when CUDNN is used
"
);
"float16 can only be used on GPU place
"
);
}
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
...
@@ -70,6 +74,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
...
@@ -70,6 +74,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
library_
);
library_
);
}
}
};
};
class
SoftmaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
SoftmaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
SoftmaxOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
SoftmaxOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
...
...
paddle/fluid/operators/softmax_op.cu.cc
浏览文件 @
b2a1c9e8
...
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
...
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
softmax_grad
,
softmax
,
ops
::
SoftmaxKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
SoftmaxGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
ops
::
SoftmaxKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
softmax_grad
,
ops
::
SoftmaxGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
paddle/fluid/platform/float16.h
浏览文件 @
b2a1c9e8
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <stdint.h>
#include <stdint.h>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#include <cuda.h>
...
@@ -293,39 +294,39 @@ struct PADDLE_ALIGN(2) float16 {
...
@@ -293,39 +294,39 @@ struct PADDLE_ALIGN(2) float16 {
HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
return
static_cast
<
int8_t
>
(
float
(
*
this
));
return
static_cast
<
int8_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint8_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint8_t
()
const
{
return
static_cast
<
uint8_t
>
(
float
(
*
this
));
return
static_cast
<
uint8_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int16_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int16_t
()
const
{
return
static_cast
<
int16_t
>
(
float
(
*
this
));
return
static_cast
<
int16_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint16_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint16_t
()
const
{
return
static_cast
<
uint16_t
>
(
float
(
*
this
));
return
static_cast
<
uint16_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int32_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int32_t
()
const
{
return
static_cast
<
int32_t
>
(
float
(
*
this
));
return
static_cast
<
int32_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint32_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint32_t
()
const
{
return
static_cast
<
uint32_t
>
(
float
(
*
this
));
return
static_cast
<
uint32_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int64_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int64_t
()
const
{
return
static_cast
<
int64_t
>
(
float
(
*
this
));
return
static_cast
<
int64_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint64_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint64_t
()
const
{
return
static_cast
<
uint64_t
>
(
float
(
*
this
));
return
static_cast
<
uint64_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
double
()
const
{
HOSTDEVICE
inline
explicit
operator
double
()
const
{
return
static_cast
<
double
>
(
float
(
*
this
));
return
static_cast
<
double
>
(
static_cast
<
float
>
(
*
this
));
}
}
private:
private:
...
@@ -370,7 +371,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
...
@@ -370,7 +371,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hadd
(
a
,
b
);
return
__hadd
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
+
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
+
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -379,7 +380,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
...
@@ -379,7 +380,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hsub
(
a
,
b
);
return
__hsub
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
-
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
-
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -388,7 +389,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
...
@@ -388,7 +389,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hmul
(
a
,
b
);
return
__hmul
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
*
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
*
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -399,7 +400,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
...
@@ -399,7 +400,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
float
denom
=
__half2float
(
b
);
float
denom
=
__half2float
(
b
);
return
__float2half
(
num
/
denom
);
return
__float2half
(
num
/
denom
);
#else
#else
float
res
=
float
(
float16
(
a
))
/
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
/
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -408,27 +409,27 @@ DEVICE inline half operator-(const half& a) {
...
@@ -408,27 +409,27 @@ DEVICE inline half operator-(const half& a) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hneg
(
a
);
return
__hneg
(
a
);
#else
#else
float
res
=
-
float
(
float16
(
a
));
float
res
=
-
static_cast
<
float
>
(
float16
(
a
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
DEVICE
inline
half
&
operator
+=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
+=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
-=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
-=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
*=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
*=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
/=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
/=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -437,7 +438,7 @@ DEVICE inline bool operator==(const half& a, const half& b) {
...
@@ -437,7 +438,7 @@ DEVICE inline bool operator==(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__heq
(
a
,
b
);
return
__heq
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
==
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
==
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -445,7 +446,7 @@ DEVICE inline bool operator!=(const half& a, const half& b) {
...
@@ -445,7 +446,7 @@ DEVICE inline bool operator!=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hne
(
a
,
b
);
return
__hne
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
!=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
!=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -453,7 +454,7 @@ DEVICE inline bool operator<(const half& a, const half& b) {
...
@@ -453,7 +454,7 @@ DEVICE inline bool operator<(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hlt
(
a
,
b
);
return
__hlt
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
<
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
<
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -461,7 +462,7 @@ DEVICE inline bool operator<=(const half& a, const half& b) {
...
@@ -461,7 +462,7 @@ DEVICE inline bool operator<=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hle
(
a
,
b
);
return
__hle
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
<=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
<=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -469,7 +470,7 @@ DEVICE inline bool operator>(const half& a, const half& b) {
...
@@ -469,7 +470,7 @@ DEVICE inline bool operator>(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hgt
(
a
,
b
);
return
__hgt
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
>
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
>
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -477,7 +478,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
...
@@ -477,7 +478,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hge
(
a
,
b
);
return
__hge
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
>=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
>=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -489,7 +490,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
...
@@ -489,7 +490,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
+
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -497,7 +498,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
...
@@ -497,7 +498,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
-
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -505,7 +506,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
...
@@ -505,7 +506,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
*
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -516,7 +517,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
...
@@ -516,7 +517,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
float
denom
=
__half2float
(
half
(
b
));
float
denom
=
__half2float
(
half
(
b
));
return
float16
(
num
/
denom
);
return
float16
(
num
/
denom
);
#else
#else
return
float16
(
float
(
a
)
/
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -530,22 +531,22 @@ HOSTDEVICE inline float16 operator-(const float16& a) {
...
@@ -530,22 +531,22 @@ HOSTDEVICE inline float16 operator-(const float16& a) {
#endif
#endif
}
}
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -554,7 +555,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
...
@@ -554,7 +555,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__heq
(
half
(
a
),
half
(
b
));
return
__heq
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
==
float
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -562,7 +563,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
...
@@ -562,7 +563,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hne
(
half
(
a
),
half
(
b
));
return
__hne
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
!=
float
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -570,7 +571,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
...
@@ -570,7 +571,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hlt
(
half
(
a
),
half
(
b
));
return
__hlt
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
<
float
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -578,7 +579,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
...
@@ -578,7 +579,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hle
(
half
(
a
),
half
(
b
));
return
__hle
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
<=
float
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -586,7 +587,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
...
@@ -586,7 +587,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hgt
(
half
(
a
),
half
(
b
));
return
__hgt
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
>
float
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -594,7 +595,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
...
@@ -594,7 +595,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hge
(
half
(
a
),
half
(
b
));
return
__hge
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
>=
float
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -679,22 +680,22 @@ inline float16 operator-(const float16& a) {
...
@@ -679,22 +680,22 @@ inline float16 operator-(const float16& a) {
return
res
;
return
res
;
}
}
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -784,19 +785,19 @@ inline bool operator>=(const float16& a, const float16& b) {
...
@@ -784,19 +785,19 @@ inline bool operator>=(const float16& a, const float16& b) {
// Arithmetic operators for float16, software emulated on other CPU
// Arithmetic operators for float16, software emulated on other CPU
#else
#else
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
+
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
-
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
*
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
/
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
-
(
const
float16
&
a
)
{
inline
float16
operator
-
(
const
float16
&
a
)
{
...
@@ -805,51 +806,57 @@ inline float16 operator-(const float16& a) {
...
@@ -805,51 +806,57 @@ inline float16 operator-(const float16& a) {
return
res
;
return
res
;
}
}
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
+
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
-
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
*
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
/
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
==
float
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
!=
float
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<
float
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<=
float
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>
float
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>=
float
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
}
}
#endif
#endif
HOSTDEVICE
inline
float16
raw_uint16_to_float16
(
uint16_t
a
)
{
float16
res
;
res
.
x
=
a
;
return
res
;
}
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hisnan
(
half
(
a
));
return
__hisnan
(
half
(
a
));
...
@@ -886,28 +893,116 @@ struct is_pod<paddle::platform::float16> {
...
@@ -886,28 +893,116 @@ struct is_pod<paddle::platform::float16> {
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
};
};
template
<
>
struct
numeric_limits
<
paddle
::
platform
::
float16
>
{
static
const
bool
is_specialized
=
true
;
static
const
bool
is_signed
=
true
;
static
const
bool
is_integer
=
false
;
static
const
bool
is_exact
=
false
;
static
const
bool
has_infinity
=
true
;
static
const
bool
has_quiet_NaN
=
true
;
static
const
bool
has_signaling_NaN
=
true
;
static
const
float_denorm_style
has_denorm
=
denorm_present
;
static
const
bool
has_denorm_loss
=
false
;
static
const
std
::
float_round_style
round_style
=
std
::
round_to_nearest
;
static
const
bool
is_iec559
=
false
;
static
const
bool
is_bounded
=
false
;
static
const
bool
is_modulo
=
false
;
static
const
int
digits
=
11
;
static
const
int
digits10
=
3
;
static
const
int
max_digits10
=
5
;
static
const
int
radix
=
2
;
static
const
int
min_exponent
=
-
13
;
static
const
int
min_exponent10
=
-
4
;
static
const
int
max_exponent
=
16
;
static
const
int
max_exponent10
=
4
;
static
const
bool
traps
=
true
;
static
const
bool
tinyness_before
=
false
;
static
paddle
::
platform
::
float16
(
min
)()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x400
);
}
static
paddle
::
platform
::
float16
lowest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0xfbff
);
}
static
paddle
::
platform
::
float16
(
max
)()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7bff
);
}
static
paddle
::
platform
::
float16
epsilon
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x0800
);
}
static
paddle
::
platform
::
float16
round_error
()
{
return
paddle
::
platform
::
float16
(
0.5
);
}
static
paddle
::
platform
::
float16
infinity
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c00
);
}
static
paddle
::
platform
::
float16
quiet_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7e00
);
}
static
paddle
::
platform
::
float16
signaling_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7e00
);
}
static
paddle
::
platform
::
float16
denorm_min
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x1
);
}
};
}
// namespace std
}
// namespace std
namespace
Eigen
{
namespace
Eigen
{
using
float16
=
paddle
::
platform
::
float16
;
template
<
>
struct
NumTraits
<
float16
>
:
GenericNumTraits
<
float16
>
{
enum
{
IsSigned
=
true
,
IsInteger
=
false
,
IsComplex
=
false
,
RequireInitialization
=
false
};
HOSTDEVICE
static
inline
float16
epsilon
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x0800
);
}
HOSTDEVICE
static
inline
float16
dummy_precision
()
{
return
float16
(
1e-2
f
);
}
HOSTDEVICE
static
inline
float16
highest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7bff
);
}
HOSTDEVICE
static
inline
float16
lowest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0xfbff
);
}
HOSTDEVICE
static
inline
float16
infinity
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c00
);
}
HOSTDEVICE
static
inline
float16
quiet_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c01
);
}
};
namespace
numext
{
namespace
numext
{
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isnan
)(
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isnan
)(
a
);
return
(
paddle
::
platform
::
isnan
)(
a
);
}
}
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isinf
)(
HOSTDEVICE
inline
bool
(
isinf
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isinf
)(
a
);
return
(
paddle
::
platform
::
isinf
)(
a
);
}
}
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isfinite
)(
HOSTDEVICE
inline
bool
(
isfinite
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isfinite
)(
a
);
return
(
paddle
::
platform
::
isfinite
)(
a
);
}
}
template
<
>
HOSTDEVICE
inline
float16
exp
(
const
float16
&
a
)
{
return
float16
(
::
expf
(
static_cast
<
float
>
(
a
)));
}
}
// namespace numext
}
// namespace numext
}
// namespace Eigen
}
// namespace Eigen
python/paddle/fluid/tests/unittests/test_softmax_op.py
浏览文件 @
b2a1c9e8
...
@@ -68,6 +68,17 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
...
@@ -68,6 +68,17 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
self
.
use_cudnn
=
True
self
.
use_cudnn
=
True
class
TestSoftmaxFP16Op
(
TestSoftmaxOp
):
def
init_kernel_type
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
class
TestSoftmaxFP16CUDNNOp
(
TestSoftmaxOp
):
class
TestSoftmaxFP16CUDNNOp
(
TestSoftmaxOp
):
def
init_kernel_type
(
self
):
def
init_kernel_type
(
self
):
self
.
use_cudnn
=
True
self
.
use_cudnn
=
True
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录