diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h
index a68dcc38aceeb4ce594bbf034681b3b075f69139..7c2c6add07afecf9b4bf171b00813c784c26709e 100644
--- a/paddle/fluid/platform/float16.h
+++ b/paddle/fluid/platform/float16.h
@@ -484,72 +484,107 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
 #endif  // PADDLE_CUDA_FP16
 
 // Arithmetic operators for float16 on GPU
-#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
-DEVICE inline float16 operator+(const float16& a, const float16& b) {
+#if defined(PADDLE_CUDA_FP16)
+HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return float16(__hadd(half(a), half(b)));
+#else
+  return float16(float(a) + float(b));
 }
 
-DEVICE inline float16 operator-(const float16& a, const float16& b) {
+HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return float16(__hsub(half(a), half(b)));
+#else
+  return float16(float(a) - float(b));
 }
 
-DEVICE inline float16 operator*(const float16& a, const float16& b) {
+HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return float16(__hmul(half(a), half(b)));
+#else
+  return float16(float(a) * float(b));
 }
 
-DEVICE inline float16 operator/(const float16& a, const float16& b) {
-  // TODO(kexinzhao): check the cuda version that starts to support __hdiv
+HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+  // TODO(kexinzhao): check which cuda version starts to support __hdiv
   float num = __half2float(half(a));
   float denom = __half2float(half(b));
   return float16(num / denom);
+#else
+  return float16(float(a) / float(b));
 }
 
-DEVICE inline float16 operator-(const float16& a) {
+HOSTDEVICE inline float16 operator-(const float16& a) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return float16(__hneg(half(a)));
+#else
+  float16 res;
+  res.x = a.x ^ 0x8000;
+  return res;
 }
 
-DEVICE inline float16& operator+=(float16& a, const float16& b) {
+HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) {
   a = a + b;
   return a;
 }
 
-DEVICE inline float16& operator-=(float16& a, const float16& b) {
+HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) {
   a = a - b;
   return a;
 }
 
-DEVICE inline float16& operator*=(float16& a, const float16& b) {
+HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) {
   a = a * b;
   return a;
 }
 
-DEVICE inline float16& operator/=(float16& a, const float16& b) {
+HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) {
   a = a / b;
   return a;
 }
 
-DEVICE inline bool operator==(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __heq(half(a), half(b));
+#else
+  return float(a) == float(b);
 }
 
-DEVICE inline bool operator!=(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __hne(half(a), half(b));
+#else
+  return float(a) != float(b);
 }
 
-DEVICE inline bool operator<(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __hlt(half(a), half(b));
+#else
+  return float(a) < float(b);
 }
 
-DEVICE inline bool operator<=(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __hle(half(a), half(b));
+#else
+  return float(a) <= float(b);
 }
 
-DEVICE inline bool operator>(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __hgt(half(a), half(b));
+#else
+  return float(a) > float(b);
 }
 
-DEVICE inline bool operator>=(const float16& a, const float16& b) {
+HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return __hge(half(a), half(b));
+#else
+  return float(a) >= float(b);
 }
 
 // Arithmetic operators for float16 on ARMv8.2-A CPU
@@ -737,71 +772,71 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
   return (res & 0xffff) != 0;
 }
 
-// Arithmetic operators for float16, software emulated on other CPU/GPU
+// Arithmetic operators for float16, software emulated on other CPU
 #else
-HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
+HOST inline float16 operator+(const float16& a, const float16& b) {
   return float16(float(a) + float(b));
 }
 
-HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
+HOST inline float16 operator-(const float16& a, const float16& b) {
   return float16(float(a) - float(b));
 }
 
-HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
+HOST inline float16 operator*(const float16& a, const float16& b) {
   return float16(float(a) * float(b));
 }
 
-HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
+HOST inline float16 operator/(const float16& a, const float16& b) {
   return float16(float(a) / float(b));
 }
 
-HOSTDEVICE inline float16 operator-(const float16& a) {
+HOST inline float16 operator-(const float16& a) {
   float16 res;
   res.x = a.x ^ 0x8000;
   return res;
 }
 
-HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) {
+HOST inline float16& operator+=(float16& a, const float16& b) {
   a = float16(float(a) + float(b));
   return a;
 }
 
-HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) {
+HOST inline float16& operator-=(float16& a, const float16& b) {
   a = float16(float(a) - float(b));
   return a;
 }
 
-HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) {
+HOST inline float16& operator*=(float16& a, const float16& b) {
   a = float16(float(a) * float(b));
   return a;
 }
 
-HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) {
+HOST inline float16& operator/=(float16& a, const float16& b) {
   a = float16(float(a) / float(b));
   return a;
 }
 
-HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
+HOST inline bool operator==(const float16& a, const float16& b) {
   return float(a) == float(b);
 }
 
-HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
+HOST inline bool operator!=(const float16& a, const float16& b) {
   return float(a) != float(b);
 }
 
-HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
+HOST inline bool operator<(const float16& a, const float16& b) {
   return float(a) < float(b);
 }
 
-HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
+HOST inline bool operator<=(const float16& a, const float16& b) {
   return float(a) <= float(b);
 }
 
-HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
+HOST inline bool operator>(const float16& a, const float16& b) {
   return float(a) > float(b);
 }
 
-HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
+HOST inline bool operator>=(const float16& a, const float16& b) {
   return float(a) >= float(b);
 }
 #endif