未验证 提交 01eddc1a 编写于 作者: Q qingqing01 提交者: GitHub

Support fp16 in GPU impl of fused_elemwise_activation_op. (#20636)

* Support fp16 in fused_elemwise_activation_op.
* Fix unit testing in ONLY-CPU mode.
上级 db9fbcbc
......@@ -20,11 +20,15 @@ REGISTER_OP_CUDA_KERNEL(
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
double>);
double>,
ops::FusedElemwiseActivationKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
fused_elemwise_activation_grad,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
float>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
double>);
double>,
ops::FusedElemwiseActivationGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
......@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/operators/math.h"
namespace paddle {
namespace operators {
namespace math {
......@@ -40,8 +42,8 @@ struct AddFunctor {
template <typename T>
struct AddGradFunctor {
inline HOSTDEVICE T Dx(T x, T y) { return 1; }
inline HOSTDEVICE T Dy(T x, T y) { return 1; }
inline HOSTDEVICE T Dx(T x, T y) { return static_cast<T>(1.); }
inline HOSTDEVICE T Dy(T x, T y) { return static_cast<T>(1.); }
};
template <typename T>
......@@ -68,14 +70,22 @@ struct ScaleGradFunctor {
template <typename T>
struct ReluFunctor {
inline HOSTDEVICE T operator()(T x) { return x * (x > 0); }
inline HOSTDEVICE T operator()(T x) {
return x * (x > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0));
}
};
template <typename T>
struct ReluGradFunctor {
inline HOSTDEVICE T UseX(T x) { return x > 0 ? 1 : 0; }
inline HOSTDEVICE T UseOut(T out) { return out > 0 ? 1 : 0; }
inline HOSTDEVICE T UseXAndOut(T x, T out) { return out > 0 ? 1 : 0; }
inline HOSTDEVICE T UseX(T x) {
return x > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
inline HOSTDEVICE T UseOut(T out) {
return out > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
inline HOSTDEVICE T UseXAndOut(T x, T out) {
return out > static_cast<T>(0) ? static_cast<T>(1) : static_cast<T>(0);
}
};
template <typename T>
......@@ -84,9 +94,9 @@ struct TanhFunctor {
const T kMax = static_cast<T>(13);
inline HOSTDEVICE T operator()(T x) {
// y = 2 / (1 + e^-2x) - 1
T t0 = 2 * x;
T t0 = static_cast<T>(2) * x;
T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0);
return static_cast<T>(2) / (static_cast<T>(1) + std::exp(-t1)) -
return static_cast<T>(2) / (static_cast<T>(1) + real_exp(-t1)) -
static_cast<T>(1);
}
};
......@@ -107,7 +117,7 @@ struct SigmoidFunctor {
inline HOSTDEVICE T operator()(T x) {
// y = 1 / (1 + e^-x)
T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x);
return static_cast<T>(1) / (static_cast<T>(1) + std::exp(-tmp));
return static_cast<T>(1) / (static_cast<T>(1) + real_exp(-tmp));
}
};
......
......@@ -33,17 +33,24 @@ from op_test import OpTest
# TestFusedElementwiseActivationOp_channelwise_add
def create_test_class(test_case, callback, attrs):
def create_test_class(test_case,
callback,
attrs,
dtype=np.float32,
grad_chek=True):
class TestFusedElementwiseActivationOp_base(OpTest):
def setUp(self):
self.op_type = "fused_elemwise_activation"
self.dtype = np.float32
self.dtype = dtype
self.axis = -1
self.init_input()
self.init_output()
self.init_attr()
self.out = self.out.astype(self.dtype)
self.intermediate_out = self.intermediate_out.astype(self.dtype)
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
......@@ -71,16 +78,25 @@ def create_test_class(test_case, callback, attrs):
self.attrs[key] = attrs[key]
def test_check_output(self):
if self.dtype == np.float16 and core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
else:
self.check_output()
# FIXME(zcd): the intermediate_out_grad is not checked.
def test_check_grad_normal(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(['X', 'Y'], ['Out'], max_relative_error=0.005)
else:
self.check_grad(['X', 'Y'], ['Out'], max_relative_error=0.005)
def test_check_grad_ingore_x(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(
['Y'], ['Out'],
......@@ -93,6 +109,8 @@ def create_test_class(test_case, callback, attrs):
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if not grad_chek:
return
if self.attrs["save_intermediate_out"]:
self.check_grad(
['X'], ['Out'],
......@@ -307,11 +325,29 @@ for mode in {0, 1}:
'functor_list': ["scale", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'scale_add_fp16' + suffix,
scale_add_func, {
'scale': scale,
'functor_list': ["scale", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('add_scale' + suffix, add_scale_func, {
'scale': scale,
'functor_list': ["elementwise_add", "scale"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'add_scale_fp16' + suffix,
add_scale_func, {
'scale': scale,
'functor_list': ["elementwise_add", "scale"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('add_relu' + suffix, add_relu_func, {
'functor_list': ["elementwise_add", "relu"],
'save_intermediate_out': save_intermediate_out,
......@@ -320,11 +356,36 @@ for mode in {0, 1}:
'functor_list': ["relu", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'add_relu_fp16' + suffix,
add_relu_func, {
'functor_list': ["elementwise_add", "relu"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class(
'relu_add_fp16' + suffix,
relu_add_func, {
'functor_list': ["relu", "elementwise_add"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
create_test_class('mul_scale' + suffix, mul_scale_func, {
'scale': scale,
'functor_list': ["elementwise_mul", "scale"],
'save_intermediate_out': save_intermediate_out,
})
create_test_class(
'mul_scale' + suffix,
mul_scale_func, {
'scale': scale,
'functor_list': ["elementwise_mul", "scale"],
'save_intermediate_out': save_intermediate_out,
},
dtype=np.float16,
grad_chek=False)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册