From 734cac1a53b904c7d3f76fe66cee1b2d19632dcf Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Fri, 17 Nov 2017 00:04:58 -0800 Subject: [PATCH] fix CUDA_VERSION issue --- paddle/math/float16.h | 29 ++++++++++++++++++++++++++++- paddle/math/tests/test_float16.cpp | 2 ++ paddle/math/tests/test_float16.cu | 2 ++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/paddle/math/float16.h b/paddle/math/float16.h index 6799a83bd..1922192f7 100644 --- a/paddle/math/float16.h +++ b/paddle/math/float16.h @@ -20,6 +20,10 @@ limitations under the License. */ #include #include +#include + +#include "paddle/utils/Logging.h" + #define USE_EIGEN #ifdef USE_EIGEN // delete this #if macro @@ -48,6 +52,27 @@ limitations under the License. */ #define PADDLE_HOSTDEVICE #endif // __CUDACC__ +#define STR(x) #x +#define XSTR(x) STR(x) + +#ifndef __CUDACC__ +#pragma message "__CUDACC__ not defined" +#else +#pragma message "__CUDACC__ defined" +#endif + +#ifndef CUDA_VERSION +#pragma message "CUDA_VERSION not defined" +#else +#pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION) +#endif + +#ifdef __CUDA_ARCH__ +#pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__) +#else +#pragma message "CUDA ARCH NOT DEFINED!" +#endif + #ifdef __arm__ #define PADDLE_ARM_32 #endif @@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 { // arithmetic operators #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 __device__ inline float16 operator+(const float16& a, const float16& b) { + printf("GPU Intrinsic used!"); return float16(__hadd(half(a), half(b))); } @@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) { #else // software emulation on other cpu PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { + LOG(INFO) << "CPU emulation used"; return float16(float(a) + float(b)); } @@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) { PADDLE_HOSTDEVICE inline float half_to_float(float16 h) { #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 half tmp = *reinterpret_cast(&h); - return __half2float(h); + return __half2float(tmp); #elif defined(PADDLE_NEON_64) float res; diff --git a/paddle/math/tests/test_float16.cpp b/paddle/math/tests/test_float16.cpp index 8d4279b41..1a20d0e92 100644 --- a/paddle/math/tests/test_float16.cpp +++ b/paddle/math/tests/test_float16.cpp @@ -15,6 +15,8 @@ limitations under the License. */ namespace paddle { TEST(float16, conversion_cpu) { + LOG(INFO) << "cpu test started!"; + // Conversion to and from Eigen::half EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00); EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800); diff --git a/paddle/math/tests/test_float16.cu b/paddle/math/tests/test_float16.cu index 6c0a1c351..9ca77cf86 100644 --- a/paddle/math/tests/test_float16.cu +++ b/paddle/math/tests/test_float16.cu @@ -16,6 +16,8 @@ namespace paddle { #ifdef PADDLE_CUDA_FP16 TEST(float16, conversion_gpu) { + LOG(INFO) << "GPU tests started"; + // Conversion to and from cuda half float16 v1 = half(float16(1.0f)); EXPECT_EQ(v1.x, 0x3c00); -- GitLab