提交 af37838e 编写于 作者: K Kexin Zhao

add test for float16

上级 d9642cb3
......@@ -20,7 +20,7 @@ limitations under the License. */
#include <istream>
#include <ostream>
#include <cuda.h>
#define USE_EIGEN
#ifdef USE_EIGEN // delete this #if macro
#include "Eigen/src/Core/arch/CUDA/Half.h"
......@@ -100,8 +100,6 @@ PADDLE_HOSTDEVICE inline float half_to_float(float16 h);
struct PADDLE_ALIGN(2) float16 {
uint16_t x;
// explicit for different types, implicit for half and Eigen::half
PADDLE_HOSTDEVICE inline float16() {}
PADDLE_HOSTDEVICE inline float16(const float16& h) : x(h.x) {}
......@@ -120,7 +118,8 @@ struct PADDLE_ALIGN(2) float16 {
PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {}
#endif // USE_EIGEN
#ifdef PADDLE_NEON
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
// __fp16 is a native half precision data type for arm cpu,
// float16_t is an alias for __fp16 in arm_fp16.h,
// which is included in arm_neon.h.
......@@ -208,7 +207,8 @@ struct PADDLE_ALIGN(2) float16 {
}
#endif // USE_EIGEN
#ifdef PADDLE_NEON
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
PADDLE_HOSTDEVICE inline float16& operator=(const float16_t* rhs) {
x = *reinterpret_cast<uint16_t*>(rhs);
return *this;
......@@ -302,7 +302,8 @@ struct PADDLE_ALIGN(2) float16 {
}
#endif // USE_EIGEN
#ifdef PADDLE_NEON
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16)
// check whether it works or not
PADDLE_HOSTDEVICE inline operator float16_t() const {
float16 h = *this;
......@@ -371,7 +372,6 @@ __device__ inline float16 operator*(const float16& a, const float16& b) {
__device__ inline float16 operator/(const float16& a, const float16& b) {
// TODO(kexinzhao): check the cuda version that starts to support __hdiv
// instinsic
float num = __half2float(half(a));
float denom = __half2float(half(b));
return float16(num / denom);
......@@ -595,7 +595,7 @@ constexpr int32_t minD = minC - subC - 1;
PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half tmp = __float2half(f);
return *reinterpret_cast<float16*>(&(tmp));
return *reinterpret_cast<float16*>(&tmp);
#elif defined(PADDLE_NEON_64) // test on RPI
float16 res;
......
......@@ -21,7 +21,7 @@ if(WITH_GPU)
CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu)
link_paddle_test(test_Tensor)
CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
link_paddle_test(test_lazyAssign)
link_paddle_test(test_lazyAssign)
else()
compile_cu_as_cpp(test_Tensor.cu)
add_unittest(test_Tensor test_Tensor.cu)
......@@ -33,3 +33,4 @@ add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix)
add_simple_unittest(test_float16)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册