From 734cac1a53b904c7d3f76fe66cee1b2d19632dcf Mon Sep 17 00:00:00 2001
From: Kexin Zhao <zhaokexin01@baidu.com>
Date: Fri, 17 Nov 2017 00:04:58 -0800
Subject: [PATCH] fix CUDA_VERSION issue

---
 paddle/math/float16.h              | 29 ++++++++++++++++++++++++++++-
 paddle/math/tests/test_float16.cpp |  2 ++
 paddle/math/tests/test_float16.cu  |  2 ++
 3 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/paddle/math/float16.h b/paddle/math/float16.h
index 6799a83bd..1922192f7 100644
--- a/paddle/math/float16.h
+++ b/paddle/math/float16.h
@@ -20,6 +20,10 @@ limitations under the License. */
 #include <istream>
 #include <ostream>
 
+#include <cuda.h>
+
+#include "paddle/utils/Logging.h"
+
 #define USE_EIGEN
 
 #ifdef USE_EIGEN  // delete this #if macro
@@ -48,6 +52,27 @@ limitations under the License. */
 #define PADDLE_HOSTDEVICE
 #endif  // __CUDACC__
 
+#define STR(x) #x
+#define XSTR(x) STR(x)
+
+#ifndef __CUDACC__
+#pragma message "__CUDACC__ not defined"
+#else
+#pragma message "__CUDACC__ defined"
+#endif
+
+#ifndef CUDA_VERSION
+#pragma message "CUDA_VERSION not defined"
+#else
+#pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION)
+#endif
+
+#ifdef __CUDA_ARCH__
+#pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__)
+#else
+#pragma message "CUDA ARCH NOT DEFINED!"
+#endif
+
 #ifdef __arm__
 #define PADDLE_ARM_32
 #endif
@@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 {
 // arithmetic operators
 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
 __device__ inline float16 operator+(const float16& a, const float16& b) {
+  printf("GPU Intrinsic used!");
   return float16(__hadd(half(a), half(b)));
 }
 
@@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) {
 
 #else  // software emulation on other cpu
 PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
+  LOG(INFO) << "CPU emulation used";
   return float16(float(a) + float(b));
 }
 
@@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
 PADDLE_HOSTDEVICE inline float half_to_float(float16 h) {
 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
   half tmp = *reinterpret_cast<half*>(&h);
-  return __half2float(h);
+  return __half2float(tmp);
 
 #elif defined(PADDLE_NEON_64)
   float res;
diff --git a/paddle/math/tests/test_float16.cpp b/paddle/math/tests/test_float16.cpp
index 8d4279b41..1a20d0e92 100644
--- a/paddle/math/tests/test_float16.cpp
+++ b/paddle/math/tests/test_float16.cpp
@@ -15,6 +15,8 @@ limitations under the License. */
 namespace paddle {
 
 TEST(float16, conversion_cpu) {
+  LOG(INFO) << "cpu test started!";
+
   // Conversion to and from Eigen::half
   EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00);
   EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800);
diff --git a/paddle/math/tests/test_float16.cu b/paddle/math/tests/test_float16.cu
index 6c0a1c351..9ca77cf86 100644
--- a/paddle/math/tests/test_float16.cu
+++ b/paddle/math/tests/test_float16.cu
@@ -16,6 +16,8 @@ namespace paddle {
 
 #ifdef PADDLE_CUDA_FP16
 TEST(float16, conversion_gpu) {
+  LOG(INFO) << "GPU tests started";
+
   // Conversion to and from cuda half
   float16 v1 = half(float16(1.0f));
   EXPECT_EQ(v1.x, 0x3c00);
-- 
GitLab