提交 734cac1a 编写于 作者: K Kexin Zhao

fix CUDA_VERSION issue

上级 080ff0c8
...@@ -20,6 +20,10 @@ limitations under the License. */ ...@@ -20,6 +20,10 @@ limitations under the License. */
#include <istream> #include <istream>
#include <ostream> #include <ostream>
#include <cuda.h>
#include "paddle/utils/Logging.h"
#define USE_EIGEN #define USE_EIGEN
#ifdef USE_EIGEN // delete this #if macro #ifdef USE_EIGEN // delete this #if macro
...@@ -48,6 +52,27 @@ limitations under the License. */ ...@@ -48,6 +52,27 @@ limitations under the License. */
#define PADDLE_HOSTDEVICE #define PADDLE_HOSTDEVICE
#endif // __CUDACC__ #endif // __CUDACC__
#define STR(x) #x
#define XSTR(x) STR(x)
#ifndef __CUDACC__
#pragma message "__CUDACC__ not defined"
#else
#pragma message "__CUDACC__ defined"
#endif
#ifndef CUDA_VERSION
#pragma message "CUDA_VERSION not defined"
#else
#pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION)
#endif
#ifdef __CUDA_ARCH__
#pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__)
#else
#pragma message "CUDA ARCH NOT DEFINED!"
#endif
#ifdef __arm__ #ifdef __arm__
#define PADDLE_ARM_32 #define PADDLE_ARM_32
#endif #endif
...@@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 {
// arithmetic operators // arithmetic operators
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
__device__ inline float16 operator+(const float16& a, const float16& b) { __device__ inline float16 operator+(const float16& a, const float16& b) {
printf("GPU Intrinsic used!");
return float16(__hadd(half(a), half(b))); return float16(__hadd(half(a), half(b)));
} }
...@@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) { ...@@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) {
#else // software emulation on other cpu #else // software emulation on other cpu
PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
LOG(INFO) << "CPU emulation used";
return float16(float(a) + float(b)); return float16(float(a) + float(b));
} }
...@@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) { ...@@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
PADDLE_HOSTDEVICE inline float half_to_float(float16 h) { PADDLE_HOSTDEVICE inline float half_to_float(float16 h) {
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half tmp = *reinterpret_cast<half*>(&h); half tmp = *reinterpret_cast<half*>(&h);
return __half2float(h); return __half2float(tmp);
#elif defined(PADDLE_NEON_64) #elif defined(PADDLE_NEON_64)
float res; float res;
......
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
TEST(float16, conversion_cpu) { TEST(float16, conversion_cpu) {
LOG(INFO) << "cpu test started!";
// Conversion to and from Eigen::half // Conversion to and from Eigen::half
EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00); EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00);
EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800); EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800);
......
...@@ -16,6 +16,8 @@ namespace paddle { ...@@ -16,6 +16,8 @@ namespace paddle {
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
TEST(float16, conversion_gpu) { TEST(float16, conversion_gpu) {
LOG(INFO) << "GPU tests started";
// Conversion to and from cuda half // Conversion to and from cuda half
float16 v1 = half(float16(1.0f)); float16 v1 = half(float16(1.0f));
EXPECT_EQ(v1.x, 0x3c00); EXPECT_EQ(v1.x, 0x3c00);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册