提交 af37838e 编写于 作者: K Kexin Zhao

add test for float16

上级 d9642cb3
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include <istream> #include <istream>
#include <ostream> #include <ostream>
#include <cuda.h> #define USE_EIGEN
#ifdef USE_EIGEN // delete this #if macro #ifdef USE_EIGEN // delete this #if macro
#include "Eigen/src/Core/arch/CUDA/Half.h" #include "Eigen/src/Core/arch/CUDA/Half.h"
...@@ -100,8 +100,6 @@ PADDLE_HOSTDEVICE inline float half_to_float(float16 h); ...@@ -100,8 +100,6 @@ PADDLE_HOSTDEVICE inline float half_to_float(float16 h);
struct PADDLE_ALIGN(2) float16 { struct PADDLE_ALIGN(2) float16 {
uint16_t x; uint16_t x;
// explicit for different types, implicit for half and Eigen::half
PADDLE_HOSTDEVICE inline float16() {} PADDLE_HOSTDEVICE inline float16() {}
PADDLE_HOSTDEVICE inline float16(const float16& h) : x(h.x) {} PADDLE_HOSTDEVICE inline float16(const float16& h) : x(h.x) {}
...@@ -120,7 +118,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -120,7 +118,8 @@ struct PADDLE_ALIGN(2) float16 {
PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {} PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {}
#endif // USE_EIGEN #endif // USE_EIGEN
#ifdef PADDLE_NEON #if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
// __fp16 is a native half precision data type for arm cpu, // __fp16 is a native half precision data type for arm cpu,
// float16_t is an alias for __fp16 in arm_fp16.h, // float16_t is an alias for __fp16 in arm_fp16.h,
// which is included in arm_neon.h. // which is included in arm_neon.h.
...@@ -208,7 +207,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -208,7 +207,8 @@ struct PADDLE_ALIGN(2) float16 {
} }
#endif // USE_EIGEN #endif // USE_EIGEN
#ifdef PADDLE_NEON #if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
PADDLE_HOSTDEVICE inline float16& operator=(const float16_t* rhs) { PADDLE_HOSTDEVICE inline float16& operator=(const float16_t* rhs) {
x = *reinterpret_cast<uint16_t*>(rhs); x = *reinterpret_cast<uint16_t*>(rhs);
return *this; return *this;
...@@ -302,7 +302,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -302,7 +302,8 @@ struct PADDLE_ALIGN(2) float16 {
} }
#endif // USE_EIGEN #endif // USE_EIGEN
#ifdef PADDLE_NEON #if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
// check whether it works or not // check whether it works or not
PADDLE_HOSTDEVICE inline operator float16_t() const { PADDLE_HOSTDEVICE inline operator float16_t() const {
float16 h = *this; float16 h = *this;
...@@ -371,7 +372,6 @@ __device__ inline float16 operator*(const float16& a, const float16& b) { ...@@ -371,7 +372,6 @@ __device__ inline float16 operator*(const float16& a, const float16& b) {
__device__ inline float16 operator/(const float16& a, const float16& b) { __device__ inline float16 operator/(const float16& a, const float16& b) {
// TODO(kexinzhao): check the cuda version that starts to support __hdiv // TODO(kexinzhao): check the cuda version that starts to support __hdiv
// instinsic
float num = __half2float(half(a)); float num = __half2float(half(a));
float denom = __half2float(half(b)); float denom = __half2float(half(b));
return float16(num / denom); return float16(num / denom);
...@@ -595,7 +595,7 @@ constexpr int32_t minD = minC - subC - 1; ...@@ -595,7 +595,7 @@ constexpr int32_t minD = minC - subC - 1;
PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) { PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half tmp = __float2half(f); half tmp = __float2half(f);
return *reinterpret_cast<float16*>(&(tmp)); return *reinterpret_cast<float16*>(&tmp);
#elif defined(PADDLE_NEON_64) // test on RPI #elif defined(PADDLE_NEON_64) // test on RPI
float16 res; float16 res;
......
...@@ -33,3 +33,4 @@ add_simple_unittest(test_FPException) ...@@ -33,3 +33,4 @@ add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler) add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix) add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix) add_simple_unittest(test_Matrix)
add_simple_unittest(test_float16)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册