diff --git a/paddle/math/float16.h b/paddle/math/float16.h index 6799a83bd391ab722d8bf1a08cdd9118133bfdcb..1922192f7bc1cd7194c2f28232cc31d45eeffb72 100644 --- a/paddle/math/float16.h +++ b/paddle/math/float16.h @@ -20,6 +20,10 @@ limitations under the License. */ #include #include +#include + +#include "paddle/utils/Logging.h" + #define USE_EIGEN #ifdef USE_EIGEN // delete this #if macro @@ -48,6 +52,27 @@ limitations under the License. */ #define PADDLE_HOSTDEVICE #endif // __CUDACC__ +#define STR(x) #x +#define XSTR(x) STR(x) + +#ifndef __CUDACC__ +#pragma message "__CUDACC__ not defined" +#else +#pragma message "__CUDACC__ defined" +#endif + +#ifndef CUDA_VERSION +#pragma message "CUDA_VERSION not defined" +#else +#pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION) +#endif + +#ifdef __CUDA_ARCH__ +#pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__) +#else +#pragma message "CUDA ARCH NOT DEFINED!" +#endif + #ifdef __arm__ #define PADDLE_ARM_32 #endif @@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 { // arithmetic operators #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 __device__ inline float16 operator+(const float16& a, const float16& b) { + printf("GPU Intrinsic used!"); return float16(__hadd(half(a), half(b))); } @@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) { #else // software emulation on other cpu PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { + LOG(INFO) << "CPU emulation used"; return float16(float(a) + float(b)); } @@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) { PADDLE_HOSTDEVICE inline float half_to_float(float16 h) { #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 half tmp = *reinterpret_cast(&h); - return __half2float(h); + return __half2float(tmp); #elif defined(PADDLE_NEON_64) float res; diff --git a/paddle/math/tests/test_float16.cpp b/paddle/math/tests/test_float16.cpp index 8d4279b4135d612970cb509f9e5df160805c6075..1a20d0e92523d7aa0924c44e586868973cf22220 100644 --- a/paddle/math/tests/test_float16.cpp +++ b/paddle/math/tests/test_float16.cpp @@ -15,6 +15,8 @@ limitations under the License. */ namespace paddle { TEST(float16, conversion_cpu) { + LOG(INFO) << "cpu test started!"; + // Conversion to and from Eigen::half EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00); EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800); diff --git a/paddle/math/tests/test_float16.cu b/paddle/math/tests/test_float16.cu index 6c0a1c351ca38124ceea760d5d1d301f664c547c..9ca77cf86c834557f940fd73cb439515916f6141 100644 --- a/paddle/math/tests/test_float16.cu +++ b/paddle/math/tests/test_float16.cu @@ -16,6 +16,8 @@ namespace paddle { #ifdef PADDLE_CUDA_FP16 TEST(float16, conversion_gpu) { + LOG(INFO) << "GPU tests started"; + // Conversion to and from cuda half float16 v1 = half(float16(1.0f)); EXPECT_EQ(v1.x, 0x3c00);