提交 22dfa5fa 编写于 作者: K Kexin Zhao

fix GPU compiling

上级 979d2e0b
...@@ -118,8 +118,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -118,8 +118,8 @@ struct PADDLE_ALIGN(2) float16 {
PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {} PADDLE_HOSTDEVICE inline float16(const Eigen::half& h) : x(h.x) {}
#endif // USE_EIGEN #endif // USE_EIGEN
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \ #if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
// __fp16 is a native half precision data type for arm cpu, // __fp16 is a native half precision data type for arm cpu,
// float16_t is an alias for __fp16 in arm_fp16.h, // float16_t is an alias for __fp16 in arm_fp16.h,
// which is included in arm_neon.h. // which is included in arm_neon.h.
...@@ -207,8 +207,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -207,8 +207,8 @@ struct PADDLE_ALIGN(2) float16 {
} }
#endif // USE_EIGEN #endif // USE_EIGEN
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \ #if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
PADDLE_HOSTDEVICE inline float16& operator=(const float16_t* rhs) { PADDLE_HOSTDEVICE inline float16& operator=(const float16_t* rhs) {
x = *reinterpret_cast<uint16_t*>(rhs); x = *reinterpret_cast<uint16_t*>(rhs);
return *this; return *this;
...@@ -302,8 +302,8 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -302,8 +302,8 @@ struct PADDLE_ALIGN(2) float16 {
} }
#endif // USE_EIGEN #endif // USE_EIGEN
#if (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34) && \ #if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) (PADDLE_GNUC_VER >= 61 || PADDLE_CLANG_VER >= 34)
// check whether it works or not // check whether it works or not
PADDLE_HOSTDEVICE inline operator float16_t() const { PADDLE_HOSTDEVICE inline operator float16_t() const {
float16 h = *this; float16 h = *this;
......
...@@ -22,15 +22,18 @@ if(WITH_GPU) ...@@ -22,15 +22,18 @@ if(WITH_GPU)
link_paddle_test(test_Tensor) link_paddle_test(test_Tensor)
CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu) CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
link_paddle_test(test_lazyAssign) link_paddle_test(test_lazyAssign)
CUDA_ADD_EXECUTABLE(test_float16 test_float16.cu)
link_paddle_test(test_float16)
else() else()
compile_cu_as_cpp(test_Tensor.cu) compile_cu_as_cpp(test_Tensor.cu)
add_unittest(test_Tensor test_Tensor.cu) add_unittest(test_Tensor test_Tensor.cu)
compile_cu_as_cpp(test_lazyAssign.cu) compile_cu_as_cpp(test_lazyAssign.cu)
add_unittest(test_lazyAssign test_lazyAssign.cu) add_unittest(test_lazyAssign test_lazyAssign.cu)
compile_cu_as_cpp(test_float16.cu)
add_unittest(test_float16 test_float16.cu)
endif(WITH_GPU) endif(WITH_GPU)
add_simple_unittest(test_FPException) add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler) add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix) add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix) add_simple_unittest(test_Matrix)
add_simple_unittest(test_float16)
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
TEST(float16, gpu) { TEST(float16, conversion_gpu) {
// Conversion to and from cuda half // Conversion to and from cuda half
float16 v1 = half(float16(1.0f)); float16 v1 = half(float16(1.0f));
EXPECT_EQ(v1.x, 0x3c00); EXPECT_EQ(v1.x, 0x3c00);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册