未验证 提交 f6463eaa 编写于 作者: Y yangguohao 提交者: GitHub

【Complex OP】No.28 LogSigmoid (#56852)

* complex op logsigmoid

* fix 2023-08-31
上级 22e5ccb0
...@@ -476,6 +476,16 @@ HOSTDEVICE inline complex<T> conj(const complex<T>& a) { ...@@ -476,6 +476,16 @@ HOSTDEVICE inline complex<T> conj(const complex<T>& a) {
#endif #endif
} }
template <typename T>
HOSTDEVICE inline complex<T> exp(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::exp(thrust::complex<T>(a)));
#else
return complex<T>(std::exp(std::complex<T>(a)));
#endif
}
template <typename T> template <typename T>
HOSTDEVICE inline complex<T> log(const complex<T>& a) { HOSTDEVICE inline complex<T> log(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \ #if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
......
...@@ -423,7 +423,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) ...@@ -423,7 +423,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hardsigmoid_grad, HardSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hardsigmoid_grad, HardSigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(logsigmoid_grad, LogSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(logsigmoid_grad,
LogSigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel)
......
...@@ -228,7 +228,7 @@ PD_REGISTER_KERNEL( ...@@ -228,7 +228,7 @@ PD_REGISTER_KERNEL(
square, CPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {} square, CPU, ALL_LAYOUT, phi::SquareKernel, float, double, int, int64_t) {}
PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel) PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel)
PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(logsigmoid, LogSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel) PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel) PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
......
...@@ -2051,6 +2051,25 @@ struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -2051,6 +2051,25 @@ struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
}; };
template <typename T>
struct LogSigmoidGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out UNUSED, dOut dout, dX dx) const {
auto temp =
(-x).cwiseMax(static_cast<ComplexType<T>>(0)); // temp = max(-x, 0)
dx.device(d) =
dout * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp()))
.unaryExpr(Conj<T>());
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T> template <typename T>
struct HardSigmoidFunctor : public BaseActivationFunctor<T> { struct HardSigmoidFunctor : public BaseActivationFunctor<T> {
float slope; float slope;
...@@ -3862,6 +3881,28 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -3862,6 +3881,28 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
}; };
template <typename T>
struct CudaLogSigmoidGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
ComplexType<T> zero = static_cast<ComplexType<T>>(0.0f);
// dx = dout * exp(-x) / (1 + exp(-x))
// For numerical stability:
// dx = dout * exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) + exp(-x - max(-x,
// 0)))
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> arg_dout, const ComplexType<T> arg_x) const {
ComplexType<T> dout = static_cast<ComplexType<T>>(arg_dout);
ComplexType<T> x = static_cast<ComplexType<T>>(arg_x);
ComplexType<T> temp1 = x > zero ? zero : -x;
ComplexType<T> temp2 = exp(-x - temp1);
return static_cast<ComplexType<T>>(dout *
conj(temp2 / (exp(-temp1) + temp2)));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T> template <typename T>
struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> { struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f); T zero = static_cast<T>(0.0f);
......
...@@ -495,7 +495,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel) ...@@ -495,7 +495,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_grad, SigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hardsigmoid_grad, HardSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hardsigmoid_grad, HardSigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(logsigmoid_grad, LogSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(logsigmoid_grad,
LogSigmoidGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel)
......
...@@ -290,7 +290,7 @@ PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel) ...@@ -290,7 +290,7 @@ PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel) PD_REGISTER_ACTIVATION_KERNEL(softsign, SoftsignKernel)
PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(logsigmoid, LogSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hardsigmoid, HardSigmoidKernel)
PD_REGISTER_ACTIVATION_KERNEL(hardswish, HardSwishKernel) PD_REGISTER_ACTIVATION_KERNEL(hardswish, HardSwishKernel)
PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel) PD_REGISTER_ACTIVATION_KERNEL(swish, SwishKernel)
......
...@@ -790,7 +790,7 @@ def log_sigmoid(x, name=None): ...@@ -790,7 +790,7 @@ def log_sigmoid(x, name=None):
log\_sigmoid(x) = log \frac{1}{1 + e^{-x}} log\_sigmoid(x) = log \frac{1}{1 + e^{-x}}
Parameters: Parameters:
x (Tensor): The input Tensor with data type float32, float64. x (Tensor): The input Tensor with data type float32, float64, complex64, complex128.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
Returns: Returns:
...@@ -813,7 +813,10 @@ def log_sigmoid(x, name=None): ...@@ -813,7 +813,10 @@ def log_sigmoid(x, name=None):
return _C_ops.logsigmoid(x) return _C_ops.logsigmoid(x)
else: else:
check_variable_and_dtype( check_variable_and_dtype(
x, 'x', ['float16', 'float32', 'float64'], 'log_sigmoid' x,
'x',
['float16', 'float32', 'float64', 'complex64', 'complex128'],
'log_sigmoid',
) )
helper = LayerHelper("log_sigmoid", **locals()) helper = LayerHelper("log_sigmoid", **locals())
out = helper.create_variable_for_type_inference(x.dtype) out = helper.create_variable_for_type_inference(x.dtype)
......
...@@ -464,6 +464,12 @@ class TestLogSigmoid(TestActivation): ...@@ -464,6 +464,12 @@ class TestLogSigmoid(TestActivation):
self.init_shape() self.init_shape()
np.random.seed(2048) np.random.seed(2048)
if self.dtype is np.complex64 or self.dtype is np.complex128:
x = (
np.random.uniform(-1, 1, self.shape)
+ 1j * np.random.uniform(-1, 1, self.shape)
).astype(self.dtype)
else:
x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
out = np.log(1 / (1 + np.exp(-x))) out = np.log(1 / (1 + np.exp(-x)))
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
...@@ -477,6 +483,16 @@ class TestLogSigmoid(TestActivation): ...@@ -477,6 +483,16 @@ class TestLogSigmoid(TestActivation):
self.check_grad(['X'], 'Out', max_relative_error=0.008) self.check_grad(['X'], 'Out', max_relative_error=0.008)
class TestLogSigmoidComplex64(TestLogSigmoid):
def init_dtype(self):
self.dtype = np.complex64
class TestLogSigmoidComplex128(TestLogSigmoid):
def init_dtype(self):
self.dtype = np.complex128
class TestLogSigmoid_ZeroDim(TestLogSigmoid): class TestLogSigmoid_ZeroDim(TestLogSigmoid):
def init_shape(self): def init_shape(self):
self.shape = [] self.shape = []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册