diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h
index 52fb8c2531357ad7a2b2f8613e5c7fbcef52c6bb..a68dcc38aceeb4ce594bbf034681b3b075f69139 100644
--- a/paddle/fluid/platform/float16.h
+++ b/paddle/fluid/platform/float16.h
@@ -483,8 +483,77 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
 
 #endif  // PADDLE_CUDA_FP16
 
-// Arithmetic operators on ARMv8.2-A CPU
-#if defined(PADDLE_WITH_NATIVE_FP16)
+// Arithmetic operators for float16 on GPU
+#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+DEVICE inline float16 operator+(const float16& a, const float16& b) {
+  return float16(__hadd(half(a), half(b)));
+}
+
+DEVICE inline float16 operator-(const float16& a, const float16& b) {
+  return float16(__hsub(half(a), half(b)));
+}
+
+DEVICE inline float16 operator*(const float16& a, const float16& b) {
+  return float16(__hmul(half(a), half(b)));
+}
+
+DEVICE inline float16 operator/(const float16& a, const float16& b) {
+  // TODO(kexinzhao): check the cuda version that starts to support __hdiv
+  float num = __half2float(half(a));
+  float denom = __half2float(half(b));
+  return float16(num / denom);
+}
+
+DEVICE inline float16 operator-(const float16& a) {
+  return float16(__hneg(half(a)));
+}
+
+DEVICE inline float16& operator+=(float16& a, const float16& b) {
+  a = a + b;
+  return a;
+}
+
+DEVICE inline float16& operator-=(float16& a, const float16& b) {
+  a = a - b;
+  return a;
+}
+
+DEVICE inline float16& operator*=(float16& a, const float16& b) {
+  a = a * b;
+  return a;
+}
+
+DEVICE inline float16& operator/=(float16& a, const float16& b) {
+  a = a / b;
+  return a;
+}
+
+DEVICE inline bool operator==(const float16& a, const float16& b) {
+  return __heq(half(a), half(b));
+}
+
+DEVICE inline bool operator!=(const float16& a, const float16& b) {
+  return __hne(half(a), half(b));
+}
+
+DEVICE inline bool operator<(const float16& a, const float16& b) {
+  return __hlt(half(a), half(b));
+}
+
+DEVICE inline bool operator<=(const float16& a, const float16& b) {
+  return __hle(half(a), half(b));
+}
+
+DEVICE inline bool operator>(const float16& a, const float16& b) {
+  return __hgt(half(a), half(b));
+}
+
+DEVICE inline bool operator>=(const float16& a, const float16& b) {
+  return __hge(half(a), half(b));
+}
+
+// Arithmetic operators for float16 on ARMv8.2-A CPU
+#elif defined(PADDLE_WITH_NATIVE_FP16)
 HOST inline float16 operator+(const float16& a, const float16& b) {
   float16 res;
   asm volatile(
@@ -668,7 +737,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
   return (res & 0xffff) != 0;
 }
 
-// Arithmetic operators, software emulated on other CPU
+// Arithmetic operators for float16, software emulated on other CPU/GPU
 #else
 HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
   return float16(float(a) + float(b));
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index 5e2c460c41e45b5edb75567aa57278714346edbd..2939895d79be8988cd97d61ae53f73ac3ed8a2fe 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -86,10 +86,13 @@ class TestDropoutOp5(OpTest):
 class TestFP16DropoutOp1(OpTest):
     def setUp(self):
         x = np.random.random((32, 64)).astype("float16")
+        prob = 0.35
+        out = x * (1.0 - prob)
+
         self.op_type = "dropout"
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.attrs = {'dropout_prob': 0.35, 'fix_seed': True, 'is_test': True}
-        self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])}
+        self.attrs = {'dropout_prob': prob, 'fix_seed': True, 'is_test': True}
+        self.outputs = {'Out': out}
 
     def test_check_output(self):
         if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"):
@@ -99,10 +102,13 @@ class TestFP16DropoutOp1(OpTest):
 class TestFP16DropoutOp2(OpTest):
     def setUp(self):
         x = np.random.random((32, 64, 3)).astype("float16")
+        prob = 0.75
+        out = x * (1.0 - prob)
+
         self.op_type = "dropout"
         self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
-        self.attrs = {'dropout_prob': 0.75, 'is_test': True}
-        self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])}
+        self.attrs = {'dropout_prob': prob, 'is_test': True}
+        self.outputs = {'Out': out}
 
     def test_check_output(self):
         if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"):