提交 734cac1a 编写于 作者: K Kexin Zhao

fix CUDA_VERSION issue

上级 080ff0c8
......@@ -20,6 +20,10 @@ limitations under the License. */
#include <istream>
#include <ostream>
#include <cuda.h>
#include "paddle/utils/Logging.h"
#define USE_EIGEN
#ifdef USE_EIGEN // delete this #if macro
......@@ -48,6 +52,27 @@ limitations under the License. */
#define PADDLE_HOSTDEVICE
#endif // __CUDACC__
#define STR(x) #x
#define XSTR(x) STR(x)
#ifndef __CUDACC__
#pragma message "__CUDACC__ not defined"
#else
#pragma message "__CUDACC__ defined"
#endif
#ifndef CUDA_VERSION
#pragma message "CUDA_VERSION not defined"
#else
#pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION)
#endif
#ifdef __CUDA_ARCH__
#pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__)
#else
#pragma message "CUDA ARCH NOT DEFINED!"
#endif
#ifdef __arm__
#define PADDLE_ARM_32
#endif
......@@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 {
// arithmetic operators
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
__device__ inline float16 operator+(const float16& a, const float16& b) {
printf("GPU Intrinsic used!");
return float16(__hadd(half(a), half(b)));
}
......@@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) {
#else // software emulation on other cpu
PADDLE_HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
LOG(INFO) << "CPU emulation used";
return float16(float(a) + float(b));
}
......@@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
PADDLE_HOSTDEVICE inline float half_to_float(float16 h) {
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
half tmp = *reinterpret_cast<half*>(&h);
return __half2float(h);
return __half2float(tmp);
#elif defined(PADDLE_NEON_64)
float res;
......
......@@ -15,6 +15,8 @@ limitations under the License. */
namespace paddle {
TEST(float16, conversion_cpu) {
LOG(INFO) << "cpu test started!";
// Conversion to and from Eigen::half
EXPECT_EQ(float16(Eigen::half(float16(1.0f))).x, 0x3c00);
EXPECT_EQ(float16(Eigen::half(float16(0.5f))).x, 0x3800);
......
......@@ -16,6 +16,8 @@ namespace paddle {
#ifdef PADDLE_CUDA_FP16
TEST(float16, conversion_gpu) {
LOG(INFO) << "GPU tests started";
// Conversion to and from cuda half
float16 v1 = half(float16(1.0f));
EXPECT_EQ(v1.x, 0x3c00);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册