提交 18d616ed 编写于 作者: K Kexin Zhao

add float16 arithmetic operators on new GPU

上级 d03dbb97
...@@ -483,8 +483,77 @@ DEVICE inline bool operator>=(const half& a, const half& b) { ...@@ -483,8 +483,77 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
#endif // PADDLE_CUDA_FP16 #endif // PADDLE_CUDA_FP16
// Arithmetic operators on ARMv8.2-A CPU // Arithmetic operators for float16 on GPU
#if defined(PADDLE_WITH_NATIVE_FP16) #if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
DEVICE inline float16 operator+(const float16& a, const float16& b) {
return float16(__hadd(half(a), half(b)));
}
DEVICE inline float16 operator-(const float16& a, const float16& b) {
return float16(__hsub(half(a), half(b)));
}
DEVICE inline float16 operator*(const float16& a, const float16& b) {
return float16(__hmul(half(a), half(b)));
}
DEVICE inline float16 operator/(const float16& a, const float16& b) {
// TODO(kexinzhao): check the cuda version that starts to support __hdiv
float num = __half2float(half(a));
float denom = __half2float(half(b));
return float16(num / denom);
}
DEVICE inline float16 operator-(const float16& a) {
return float16(__hneg(half(a)));
}
DEVICE inline float16& operator+=(float16& a, const float16& b) {
a = a + b;
return a;
}
DEVICE inline float16& operator-=(float16& a, const float16& b) {
a = a - b;
return a;
}
DEVICE inline float16& operator*=(float16& a, const float16& b) {
a = a * b;
return a;
}
DEVICE inline float16& operator/=(float16& a, const float16& b) {
a = a / b;
return a;
}
DEVICE inline bool operator==(const float16& a, const float16& b) {
return __heq(half(a), half(b));
}
DEVICE inline bool operator!=(const float16& a, const float16& b) {
return __hne(half(a), half(b));
}
DEVICE inline bool operator<(const float16& a, const float16& b) {
return __hlt(half(a), half(b));
}
DEVICE inline bool operator<=(const float16& a, const float16& b) {
return __hle(half(a), half(b));
}
DEVICE inline bool operator>(const float16& a, const float16& b) {
return __hgt(half(a), half(b));
}
DEVICE inline bool operator>=(const float16& a, const float16& b) {
return __hge(half(a), half(b));
}
// Arithmetic operators for float16 on ARMv8.2-A CPU
#elif defined(PADDLE_WITH_NATIVE_FP16)
HOST inline float16 operator+(const float16& a, const float16& b) { HOST inline float16 operator+(const float16& a, const float16& b) {
float16 res; float16 res;
asm volatile( asm volatile(
...@@ -668,7 +737,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) { ...@@ -668,7 +737,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
return (res & 0xffff) != 0; return (res & 0xffff) != 0;
} }
// Arithmetic operators, software emulated on other CPU // Arithmetic operators for float16, software emulated on other CPU/GPU
#else #else
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) { HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
return float16(float(a) + float(b)); return float16(float(a) + float(b));
......
...@@ -86,10 +86,13 @@ class TestDropoutOp5(OpTest): ...@@ -86,10 +86,13 @@ class TestDropoutOp5(OpTest):
class TestFP16DropoutOp1(OpTest): class TestFP16DropoutOp1(OpTest):
def setUp(self): def setUp(self):
x = np.random.random((32, 64)).astype("float16") x = np.random.random((32, 64)).astype("float16")
prob = 0.35
out = x * (1.0 - prob)
self.op_type = "dropout" self.op_type = "dropout"
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {'dropout_prob': 0.35, 'fix_seed': True, 'is_test': True} self.attrs = {'dropout_prob': prob, 'fix_seed': True, 'is_test': True}
self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"): if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"):
...@@ -99,10 +102,13 @@ class TestFP16DropoutOp1(OpTest): ...@@ -99,10 +102,13 @@ class TestFP16DropoutOp1(OpTest):
class TestFP16DropoutOp2(OpTest): class TestFP16DropoutOp2(OpTest):
def setUp(self): def setUp(self):
x = np.random.random((32, 64, 3)).astype("float16") x = np.random.random((32, 64, 3)).astype("float16")
prob = 0.75
out = x * (1.0 - prob)
self.op_type = "dropout" self.op_type = "dropout"
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.attrs = {'dropout_prob': prob, 'is_test': True}
self.outputs = {'Out': x * (1.0 - self.attrs['dropout_prob'])} self.outputs = {'Out': out}
def test_check_output(self): def test_check_output(self):
if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"): if core.is_compiled_with_cuda() and core.op_support_gpu("dropout"):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册