未验证 提交 86434818 编写于 作者: Z Zhang Ting 提交者: GitHub

[part 2]change type of function args (#38886)

上级 df5d55bb
...@@ -24,7 +24,7 @@ struct CudaReluFunctor : public BaseActivationFunctor<T> { ...@@ -24,7 +24,7 @@ struct CudaReluFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f); T zero = static_cast<T>(0.0f);
// relu(x) = max(x, 0) // relu(x) = max(x, 0)
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
return x > zero ? x : zero; return x > zero ? x : zero;
} }
}; };
...@@ -34,7 +34,7 @@ struct CudaReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -34,7 +34,7 @@ struct CudaReluGradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f); T zero = static_cast<T>(0.0f);
// dx = dout * (out > 0) // dx = dout * (out > 0)
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return out > zero ? dout : zero; return out > zero ? dout : zero;
} }
...@@ -51,7 +51,7 @@ struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> { ...@@ -51,7 +51,7 @@ struct CudaLeakyReluFunctor : public BaseActivationFunctor<T> {
} }
// leakyrelu(x) = x > 0 ? x : alpha * x // leakyrelu(x) = x > 0 ? x : alpha * x
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
return x > zero ? x : static_cast<T>(alpha) * x; return x > zero ? x : static_cast<T>(alpha) * x;
} }
}; };
...@@ -66,7 +66,7 @@ struct CudaLeakyReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -66,7 +66,7 @@ struct CudaLeakyReluGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = dout * (x > 0 ? 1 : alpha) // dx = dout * (x > 0 ? 1 : alpha)
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return x > zero ? dout : static_cast<T>(alpha) * dout; return x > zero ? dout : static_cast<T>(alpha) * dout;
} }
...@@ -79,7 +79,7 @@ struct CudaSigmoidFunctor : public BaseActivationFunctor<T> { ...@@ -79,7 +79,7 @@ struct CudaSigmoidFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// sigmoid(x) = 1 / (1 + exp(-x)) // sigmoid(x) = 1 / (1 + exp(-x))
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(one / (one + exp(-x))); return static_cast<T>(one / (one + exp(-x)));
} }
...@@ -90,7 +90,7 @@ struct CudaSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -90,7 +90,7 @@ struct CudaSigmoidGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// dx = dout * out * (1 - out) // dx = dout * out * (1 - out)
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out * (one - out); return dout * out * (one - out);
} }
...@@ -103,7 +103,7 @@ struct CudaSiluFunctor : public BaseActivationFunctor<T> { ...@@ -103,7 +103,7 @@ struct CudaSiluFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// silu(x) = x / (1 + exp(-x)) // silu(x) = x / (1 + exp(-x))
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(x / (one + exp(-x))); return static_cast<T>(x / (one + exp(-x)));
} }
...@@ -115,8 +115,8 @@ struct CudaSiluGradFunctor : public BaseActivationFunctor<T> { ...@@ -115,8 +115,8 @@ struct CudaSiluGradFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2) // dx = dout * (1 + exp(-x) + x * exp(-x) / (1 + exp(-x))^2)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType temp = one / (one + exp(-x)); MPType temp = one / (one + exp(-x));
...@@ -135,7 +135,7 @@ struct CudaLogSigmoidFunctor : public BaseActivationFunctor<T> { ...@@ -135,7 +135,7 @@ struct CudaLogSigmoidFunctor : public BaseActivationFunctor<T> {
// For numerical stability, // For numerical stability,
// logsigmoid(x) = // logsigmoid(x) =
// - (max(-x, 0) + log(exp(-max(-x, 0)) + exp(-x - max(-x, 0)))) // - (max(-x, 0) + log(exp(-max(-x, 0)) + exp(-x - max(-x, 0))))
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType temp = x > zero ? zero : -x; MPType temp = x > zero ? zero : -x;
return static_cast<T>(-temp - log(exp(-temp) + exp(-x - temp))); return static_cast<T>(-temp - log(exp(-temp) + exp(-x - temp)));
...@@ -151,8 +151,8 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -151,8 +151,8 @@ struct CudaLogSigmoidGradFunctor : public BaseActivationFunctor<T> {
// For numerical stability: // For numerical stability:
// dx = dout * exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) + exp(-x - max(-x, // dx = dout * exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) + exp(-x - max(-x,
// 0))) // 0)))
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType temp1 = x > zero ? zero : -x; MPType temp1 = x > zero ? zero : -x;
...@@ -168,7 +168,7 @@ struct CudaAtanFunctor : public BaseActivationFunctor<T> { ...@@ -168,7 +168,7 @@ struct CudaAtanFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// atan(x) = atan(x) // atan(x) = atan(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(atan(x)); return static_cast<T>(atan(x));
} }
...@@ -179,7 +179,7 @@ struct CudaAtanGradFunctor : public BaseActivationFunctor<T> { ...@@ -179,7 +179,7 @@ struct CudaAtanGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// dx = dout / (1 + x^2) // dx = dout / (1 + x^2)
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout / (one + x * x); return dout / (one + x * x);
} }
...@@ -197,7 +197,7 @@ struct CudaSoftShrinkFunctor : public BaseActivationFunctor<T> { ...@@ -197,7 +197,7 @@ struct CudaSoftShrinkFunctor : public BaseActivationFunctor<T> {
// softshrink(x) = x - lambda, if x > lambda; // softshrink(x) = x - lambda, if x > lambda;
// x + lambda, if x < -lambda; // x + lambda, if x < -lambda;
// 0, otherwise. // 0, otherwise.
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T l = static_cast<T>(lambda); T l = static_cast<T>(lambda);
T temp1 = static_cast<T>(x > l); T temp1 = static_cast<T>(x > l);
T temp2 = static_cast<T>(x < -l); T temp2 = static_cast<T>(x < -l);
...@@ -215,7 +215,7 @@ struct CudaSoftShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -215,7 +215,7 @@ struct CudaSoftShrinkGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = dout, if x > lambda or x < -lambda else 0 // dx = dout, if x > lambda or x < -lambda else 0
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
T l = static_cast<T>(lambda); T l = static_cast<T>(lambda);
return (x >= -l && x <= l) ? zero : dout; return (x >= -l && x <= l) ? zero : dout;
} }
...@@ -228,7 +228,7 @@ struct CudaCeilFunctor : public BaseActivationFunctor<T> { ...@@ -228,7 +228,7 @@ struct CudaCeilFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// ceil(x) = ceil(x) // ceil(x) = ceil(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(ceil(x)); return static_cast<T>(ceil(x));
} }
...@@ -239,7 +239,7 @@ struct CudaFloorFunctor : public BaseActivationFunctor<T> { ...@@ -239,7 +239,7 @@ struct CudaFloorFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// floor(x) = floor(x) // floor(x) = floor(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(floor(x)); return static_cast<T>(floor(x));
} }
...@@ -250,7 +250,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> { ...@@ -250,7 +250,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// round(x) = round(x) // round(x) = round(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(round(x)); return static_cast<T>(round(x));
} }
...@@ -259,7 +259,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> { ...@@ -259,7 +259,7 @@ struct CudaRoundFunctor : public BaseActivationFunctor<T> {
// GradFunctor for ceil, floor and round // GradFunctor for ceil, floor and round
template <typename T> template <typename T>
struct CudaZeroGradFunctor : public BaseActivationFunctor<T> { struct CudaZeroGradFunctor : public BaseActivationFunctor<T> {
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
return static_cast<T>(0.0f); return static_cast<T>(0.0f);
} }
...@@ -271,7 +271,7 @@ struct CudaCosFunctor : public BaseActivationFunctor<T> { ...@@ -271,7 +271,7 @@ struct CudaCosFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// cos(x) = cos(x) // cos(x) = cos(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(cos(x)); return static_cast<T>(cos(x));
} }
...@@ -282,8 +282,8 @@ struct CudaCosGradFunctor : public BaseActivationFunctor<T> { ...@@ -282,8 +282,8 @@ struct CudaCosGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * (-sin(x)) // dx = dout * (-sin(x))
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(-dout * sin(x)); return static_cast<T>(-dout * sin(x));
...@@ -297,7 +297,7 @@ struct CudaSinFunctor : public BaseActivationFunctor<T> { ...@@ -297,7 +297,7 @@ struct CudaSinFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// sin(x) = sin(x) // sin(x) = sin(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sin(x)); return static_cast<T>(sin(x));
} }
...@@ -308,8 +308,8 @@ struct CudaSinGradFunctor : public BaseActivationFunctor<T> { ...@@ -308,8 +308,8 @@ struct CudaSinGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * cos(x) // dx = dout * cos(x)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * cos(x)); return static_cast<T>(dout * cos(x));
...@@ -323,7 +323,7 @@ struct CudaTanFunctor : public BaseActivationFunctor<T> { ...@@ -323,7 +323,7 @@ struct CudaTanFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// tan(x) = tan(x) // tan(x) = tan(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(tan(x)); return static_cast<T>(tan(x));
} }
...@@ -334,8 +334,8 @@ struct CudaTanGradFunctor : public BaseActivationFunctor<T> { ...@@ -334,8 +334,8 @@ struct CudaTanGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout / cos(x)^2 // dx = dout / cos(x)^2
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout / (cos(x) * cos(x))); return static_cast<T>(dout / (cos(x) * cos(x)));
...@@ -349,7 +349,7 @@ struct CudaAsinFunctor : public BaseActivationFunctor<T> { ...@@ -349,7 +349,7 @@ struct CudaAsinFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// asin(x) = asin(x) // asin(x) = asin(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(asin(x)); return static_cast<T>(asin(x));
} }
...@@ -361,8 +361,8 @@ struct CudaAsinGradFunctor : public BaseActivationFunctor<T> { ...@@ -361,8 +361,8 @@ struct CudaAsinGradFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = dout / sqrt(1 - x^2) // dx = dout / sqrt(1 - x^2)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout / sqrt(one - x * x)); return static_cast<T>(dout / sqrt(one - x * x));
...@@ -376,7 +376,7 @@ struct CudaAcosFunctor : public BaseActivationFunctor<T> { ...@@ -376,7 +376,7 @@ struct CudaAcosFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// acos(x) = acos(x) // acos(x) = acos(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(acos(x)); return static_cast<T>(acos(x));
} }
...@@ -388,8 +388,8 @@ struct CudaAcosGradFunctor : public BaseActivationFunctor<T> { ...@@ -388,8 +388,8 @@ struct CudaAcosGradFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = -dout / sqrt(1 - x^2) // dx = -dout / sqrt(1 - x^2)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(-dout / sqrt(one - x * x)); return static_cast<T>(-dout / sqrt(one - x * x));
...@@ -403,7 +403,7 @@ struct CudaCoshFunctor : public BaseActivationFunctor<T> { ...@@ -403,7 +403,7 @@ struct CudaCoshFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// cosh(x) = cosh(x) // cosh(x) = cosh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(cosh(x)); return static_cast<T>(cosh(x));
} }
...@@ -414,8 +414,8 @@ struct CudaCoshGradFunctor : public BaseActivationFunctor<T> { ...@@ -414,8 +414,8 @@ struct CudaCoshGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * sinh(x) // dx = dout * sinh(x)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * sinh(x)); return static_cast<T>(dout * sinh(x));
...@@ -429,7 +429,7 @@ struct CudaSinhFunctor : public BaseActivationFunctor<T> { ...@@ -429,7 +429,7 @@ struct CudaSinhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// sinh(x) = sinh(x) // sinh(x) = sinh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sinh(x)); return static_cast<T>(sinh(x));
} }
...@@ -440,8 +440,8 @@ struct CudaSinhGradFunctor : public BaseActivationFunctor<T> { ...@@ -440,8 +440,8 @@ struct CudaSinhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * cosh(x) // dx = dout * cosh(x)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * cosh(x)); return static_cast<T>(dout * cosh(x));
...@@ -455,7 +455,7 @@ struct CudaTanhFunctor : public BaseActivationFunctor<T> { ...@@ -455,7 +455,7 @@ struct CudaTanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// tanh(x) = tanh(x) // tanh(x) = tanh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(tanh(x)); return static_cast<T>(tanh(x));
} }
...@@ -466,7 +466,7 @@ struct CudaTanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -466,7 +466,7 @@ struct CudaTanhGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// dx = dout * (1 - out^2) // dx = dout * (1 - out^2)
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * (one - out * out); return dout * (one - out * out);
} }
...@@ -478,7 +478,7 @@ struct CudaAcoshFunctor : public BaseActivationFunctor<T> { ...@@ -478,7 +478,7 @@ struct CudaAcoshFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// Acosh(x) = acosh(x) // Acosh(x) = acosh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(acosh(x)); return static_cast<T>(acosh(x));
} }
...@@ -489,8 +489,8 @@ struct CudaAcoshGradFunctor : public BaseActivationFunctor<T> { ...@@ -489,8 +489,8 @@ struct CudaAcoshGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = dout * 1 / sqrt(x^2 - 1) // dx = dout * 1 / sqrt(x^2 - 1)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * one / sqrt(x * x - one)); return static_cast<T>(dout * one / sqrt(x * x - one));
...@@ -504,7 +504,7 @@ struct CudaAsinhFunctor : public BaseActivationFunctor<T> { ...@@ -504,7 +504,7 @@ struct CudaAsinhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// Asinh(x) = asinh(x) // Asinh(x) = asinh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(asinh(x)); return static_cast<T>(asinh(x));
} }
...@@ -516,8 +516,8 @@ struct CudaAsinhGradFunctor : public BaseActivationFunctor<T> { ...@@ -516,8 +516,8 @@ struct CudaAsinhGradFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = dout * 1/sqrt(x^2 + 1) // dx = dout * 1/sqrt(x^2 + 1)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * one / sqrt(x * x + one)); return static_cast<T>(dout * one / sqrt(x * x + one));
...@@ -531,7 +531,7 @@ struct CudaAtanhFunctor : public BaseActivationFunctor<T> { ...@@ -531,7 +531,7 @@ struct CudaAtanhFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// Atanh(x) = atanh(x) // Atanh(x) = atanh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(atanh(x)); return static_cast<T>(atanh(x));
} }
...@@ -542,8 +542,8 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -542,8 +542,8 @@ struct CudaAtanhGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// dx = dout * 1/(1- x^2) // dx = dout * 1/(1- x^2)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * one / (one - x * x)); return static_cast<T>(dout * one / (one - x * x));
...@@ -557,13 +557,13 @@ struct CudaReciprocalFunctor : public BaseActivationFunctor<T> { ...@@ -557,13 +557,13 @@ struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// reciprocal(x) = 1 / x // reciprocal(x) = 1 / x
__device__ __forceinline__ T operator()(const T& x) const { return one / x; } __device__ __forceinline__ T operator()(const T x) const { return one / x; }
}; };
template <typename T> template <typename T>
struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> { struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
// dx = -dout * out^2 // dx = -dout * out^2
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return -dout * out * out; return -dout * out * out;
} }
...@@ -575,7 +575,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> { ...@@ -575,7 +575,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// exp(x) = exp(x) // exp(x) = exp(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(exp(x)); return static_cast<T>(exp(x));
} }
...@@ -584,7 +584,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> { ...@@ -584,7 +584,7 @@ struct CudaExpFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct CudaExpGradFunctor : public BaseActivationFunctor<T> { struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out // dx = dout * out
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out; return dout * out;
} }
...@@ -596,7 +596,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> { ...@@ -596,7 +596,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// expm1(x) = expm1(x) // expm1(x) = expm1(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(expm1(x)); return static_cast<T>(expm1(x));
} }
...@@ -605,7 +605,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> { ...@@ -605,7 +605,7 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> { struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out // dx = dout * out
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout; return dout * out + dout;
} }
...@@ -617,7 +617,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> { ...@@ -617,7 +617,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// log(x) = log(x) // log(x) = log(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(log(x)); return static_cast<T>(log(x));
} }
...@@ -626,7 +626,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> { ...@@ -626,7 +626,7 @@ struct CudaLogFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct CudaLogGradFunctor : public BaseActivationFunctor<T> { struct CudaLogGradFunctor : public BaseActivationFunctor<T> {
// dx = dout / x // dx = dout / x
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout / x; return dout / x;
} }
...@@ -636,7 +636,7 @@ struct CudaLogGradFunctor : public BaseActivationFunctor<T> { ...@@ -636,7 +636,7 @@ struct CudaLogGradFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> { struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x // square(x) = x * x
__device__ __forceinline__ T operator()(const T& x) const { return x * x; } __device__ __forceinline__ T operator()(const T x) const { return x * x; }
}; };
template <typename T> template <typename T>
...@@ -644,7 +644,7 @@ struct CudaSquareGradFunctor : public BaseActivationFunctor<T> { ...@@ -644,7 +644,7 @@ struct CudaSquareGradFunctor : public BaseActivationFunctor<T> {
T two = static_cast<T>(2.0f); T two = static_cast<T>(2.0f);
// dx = dout * 2 * x // dx = dout * 2 * x
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout * two * x; return dout * two * x;
} }
...@@ -656,7 +656,7 @@ struct CudaSqrtFunctor : public BaseActivationFunctor<T> { ...@@ -656,7 +656,7 @@ struct CudaSqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// sqrt(x) = sqrt(x) // sqrt(x) = sqrt(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(sqrt(x)); return static_cast<T>(sqrt(x));
} }
...@@ -667,7 +667,7 @@ struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> { ...@@ -667,7 +667,7 @@ struct CudaSqrtGradFunctor : public BaseActivationFunctor<T> {
T one_half = static_cast<T>(0.5f); T one_half = static_cast<T>(0.5f);
// dx = dout * 0.5 / out // dx = dout * 0.5 / out
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return one_half * dout / out; return one_half * dout / out;
} }
...@@ -679,7 +679,7 @@ struct CudaRsqrtFunctor : public BaseActivationFunctor<T> { ...@@ -679,7 +679,7 @@ struct CudaRsqrtFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// rsqrt(x) = rsqrt(x) // rsqrt(x) = rsqrt(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(rsqrt(x)); return static_cast<T>(rsqrt(x));
} }
...@@ -690,7 +690,7 @@ struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> { ...@@ -690,7 +690,7 @@ struct CudaRsqrtGradFunctor : public BaseActivationFunctor<T> {
T minus_one_half = static_cast<T>(-0.5f); T minus_one_half = static_cast<T>(-0.5f);
// dx = -0.5 * dout * out^3 // dx = -0.5 * dout * out^3
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return minus_one_half * dout * out * out * out; return minus_one_half * dout * out * out * out;
} }
...@@ -703,7 +703,7 @@ struct CudaLog1pFunctor : public BaseActivationFunctor<T> { ...@@ -703,7 +703,7 @@ struct CudaLog1pFunctor : public BaseActivationFunctor<T> {
MPType one = static_cast<MPType>(1.0f); MPType one = static_cast<MPType>(1.0f);
// log1p(x) = log(1 + x) // log1p(x) = log(1 + x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(log(one + x)); return static_cast<T>(log(one + x));
} }
...@@ -714,7 +714,7 @@ struct CudaLog1pGradFunctor : public BaseActivationFunctor<T> { ...@@ -714,7 +714,7 @@ struct CudaLog1pGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// dx = dout / (1 + x) // dx = dout / (1 + x)
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout / (one + x); return dout / (one + x);
} }
...@@ -726,7 +726,7 @@ struct CudaLog2Functor : public BaseActivationFunctor<T> { ...@@ -726,7 +726,7 @@ struct CudaLog2Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// log2(x) = log2(x) // log2(x) = log2(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(log2(x)); return static_cast<T>(log2(x));
} }
...@@ -738,7 +738,7 @@ struct CudaLog2GradFunctor : public BaseActivationFunctor<T> { ...@@ -738,7 +738,7 @@ struct CudaLog2GradFunctor : public BaseActivationFunctor<T> {
T log_two = static_cast<T>(log(static_cast<MPType>(2.0f))); T log_two = static_cast<T>(log(static_cast<MPType>(2.0f)));
// dx = dout / (x * log(2)) // dx = dout / (x * log(2))
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout / (x * log_two); return dout / (x * log_two);
} }
...@@ -750,7 +750,7 @@ struct CudaLog10Functor : public BaseActivationFunctor<T> { ...@@ -750,7 +750,7 @@ struct CudaLog10Functor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// log10(x) = log10(x) // log10(x) = log10(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(log10(x)); return static_cast<T>(log10(x));
} }
...@@ -762,7 +762,7 @@ struct CudaLog10GradFunctor : public BaseActivationFunctor<T> { ...@@ -762,7 +762,7 @@ struct CudaLog10GradFunctor : public BaseActivationFunctor<T> {
T log_ten = static_cast<T>(log(static_cast<MPType>(10.0f))); T log_ten = static_cast<T>(log(static_cast<MPType>(10.0f)));
// dx = dout / (x * log(10)) // dx = dout / (x * log(10))
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return dout / (x * log_ten); return dout / (x * log_ten);
} }
...@@ -779,7 +779,7 @@ struct CudaBReluFunctor : public BaseActivationFunctor<T> { ...@@ -779,7 +779,7 @@ struct CudaBReluFunctor : public BaseActivationFunctor<T> {
} }
// brelu(x) = min(max(x, t_min), t_max) // brelu(x) = min(max(x, t_min), t_max)
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T t_min_cast = static_cast<T>(t_min); T t_min_cast = static_cast<T>(t_min);
T t_max_cast = static_cast<T>(t_max); T t_max_cast = static_cast<T>(t_max);
T temp_max = x > t_min_cast ? x : t_min_cast; T temp_max = x > t_min_cast ? x : t_min_cast;
...@@ -799,7 +799,7 @@ struct CudaBReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -799,7 +799,7 @@ struct CudaBReluGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = (x > t_min && x < t_max) ? dout : 0 // dx = (x > t_min && x < t_max) ? dout : 0
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
T t_min_cast = static_cast<T>(t_min); T t_min_cast = static_cast<T>(t_min);
T t_max_cast = static_cast<T>(t_max); T t_max_cast = static_cast<T>(t_max);
return (x > t_min_cast && x < t_max_cast) ? dout : zero; return (x > t_min_cast && x < t_max_cast) ? dout : zero;
...@@ -820,7 +820,7 @@ struct CudaSoftReluFunctor : public BaseActivationFunctor<T> { ...@@ -820,7 +820,7 @@ struct CudaSoftReluFunctor : public BaseActivationFunctor<T> {
// soft_relu(x) = log(1 + exp(max(min(x, threshold), -threshold))) // soft_relu(x) = log(1 + exp(max(min(x, threshold), -threshold)))
// threshold should not be negative // threshold should not be negative
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType t = static_cast<MPType>(threshold); MPType t = static_cast<MPType>(threshold);
MPType temp_min = x < t ? x : t; MPType temp_min = x < t ? x : t;
...@@ -841,8 +841,8 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -841,8 +841,8 @@ struct CudaSoftReluGradFunctor : public BaseActivationFunctor<T> {
// dx = (out > -threshold && out < threshold) ? dout * (1 - exp(-out)) : 0 // dx = (out > -threshold && out < threshold) ? dout * (1 - exp(-out)) : 0
// threshold should not be negative // threshold should not be negative
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_out) const { const T arg_out) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType out = static_cast<MPType>(arg_out); MPType out = static_cast<MPType>(arg_out);
MPType t = static_cast<MPType>(threshold); MPType t = static_cast<MPType>(threshold);
...@@ -864,7 +864,7 @@ struct CudaSTanhFunctor : public BaseActivationFunctor<T> { ...@@ -864,7 +864,7 @@ struct CudaSTanhFunctor : public BaseActivationFunctor<T> {
} }
// stanh(x) = b * tanh(a * x) // stanh(x) = b * tanh(a * x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a); MPType a = static_cast<MPType>(scale_a);
MPType b = static_cast<MPType>(scale_b); MPType b = static_cast<MPType>(scale_b);
...@@ -884,8 +884,8 @@ struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -884,8 +884,8 @@ struct CudaSTanhGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x)) // dx = dout * a * b * (1 - tanh(a * x) * tanh(a * x))
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(scale_a); MPType a = static_cast<MPType>(scale_a);
...@@ -909,7 +909,7 @@ struct CudaSoftplusFunctor : public BaseActivationFunctor<T> { ...@@ -909,7 +909,7 @@ struct CudaSoftplusFunctor : public BaseActivationFunctor<T> {
} }
// softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta // softplus(x) = beta * x > threshold ? x : log(1 + exp(beta * x)) / beta
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta); MPType b = static_cast<MPType>(beta);
MPType t = static_cast<MPType>(threshold); MPType t = static_cast<MPType>(threshold);
...@@ -930,8 +930,8 @@ struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> { ...@@ -930,8 +930,8 @@ struct CudaSoftplusGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x)) // dx = x * beta > threshold ? dout : dout / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta); MPType b = static_cast<MPType>(beta);
...@@ -948,7 +948,7 @@ struct CudaSoftsignFunctor : public BaseActivationFunctor<T> { ...@@ -948,7 +948,7 @@ struct CudaSoftsignFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// softsign(x) = x / (1 + abs(x)) // softsign(x) = x / (1 + abs(x))
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
return x / (one + abs(x)); return x / (one + abs(x));
} }
}; };
...@@ -958,7 +958,7 @@ struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> { ...@@ -958,7 +958,7 @@ struct CudaSoftsignGradFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f); T one = static_cast<T>(1.0f);
// dx = dout / (1 + abs(x))^2 // dx = dout / (1 + abs(x))^2
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
T temp = one + abs(x); T temp = one + abs(x);
return dout / (temp * temp); return dout / (temp * temp);
} }
...@@ -976,7 +976,7 @@ struct CudaRelu6Functor : public BaseActivationFunctor<T> { ...@@ -976,7 +976,7 @@ struct CudaRelu6Functor : public BaseActivationFunctor<T> {
} }
// relu6(x) = min(max(0, x), 6) // relu6(x) = min(max(0, x), 6)
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold); T t = static_cast<T>(threshold);
return x <= zero ? zero : (x < t ? x : t); return x <= zero ? zero : (x < t ? x : t);
} }
...@@ -992,7 +992,7 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> { ...@@ -992,7 +992,7 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
} }
// dx = (out > 0 && out < t) ? dout : 0 // dx = (out > 0 && out < t) ? dout : 0
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
T t = static_cast<T>(threshold); T t = static_cast<T>(threshold);
return (out > zero && out < t) ? dout : zero; return (out > zero && out < t) ? dout : zero;
} }
...@@ -1005,7 +1005,7 @@ struct CudaTanhShrinkFunctor : public BaseActivationFunctor<T> { ...@@ -1005,7 +1005,7 @@ struct CudaTanhShrinkFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// tanhshrink(x) = x - tanh(x) // tanhshrink(x) = x - tanh(x)
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(x - tanh(x)); return static_cast<T>(x - tanh(x));
} }
...@@ -1016,8 +1016,8 @@ struct CudaTanhShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -1016,8 +1016,8 @@ struct CudaTanhShrinkGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type; using MPType = typename details::MPTypeTrait<T>::Type;
// dx = dout * tanh(x)^2 // dx = dout * tanh(x)^2
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(dout * tanh(x) * tanh(x)); return static_cast<T>(dout * tanh(x) * tanh(x));
...@@ -1036,7 +1036,7 @@ struct CudaHardShrinkFunctor : public BaseActivationFunctor<T> { ...@@ -1036,7 +1036,7 @@ struct CudaHardShrinkFunctor : public BaseActivationFunctor<T> {
} }
// hadrshrink(x) = (x > -threshold && x < threshold) ? 0 : x // hadrshrink(x) = (x > -threshold && x < threshold) ? 0 : x
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold); T t = static_cast<T>(threshold);
return (x > -t && x < t) ? zero : x; return (x > -t && x < t) ? zero : x;
} }
...@@ -1052,7 +1052,7 @@ struct CudaHardShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -1052,7 +1052,7 @@ struct CudaHardShrinkGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = (x > -threshold && x < threshold) ? 0 : dout // dx = (x > -threshold && x < threshold) ? 0 : dout
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
T t = static_cast<T>(threshold); T t = static_cast<T>(threshold);
return (x > -t && x < t) ? zero : dout; return (x > -t && x < t) ? zero : dout;
} }
...@@ -1074,7 +1074,7 @@ struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> { ...@@ -1074,7 +1074,7 @@ struct CudaHardSigmoidFunctor : public BaseActivationFunctor<T> {
// hard_sigmoid(x) = 0, when x <= -3 // hard_sigmoid(x) = 0, when x <= -3
// 1, when x >= 3 // 1, when x >= 3
// x * slope + offset, otherwise // x * slope + offset, otherwise
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T temp = x * static_cast<T>(slope) + static_cast<T>(offset); T temp = x * static_cast<T>(slope) + static_cast<T>(offset);
T temp_max = temp > zero ? temp : zero; T temp_max = temp > zero ? temp : zero;
T temp_min = temp_max < one ? temp_max : one; T temp_min = temp_max < one ? temp_max : one;
...@@ -1094,7 +1094,7 @@ struct CudaHardSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -1094,7 +1094,7 @@ struct CudaHardSigmoidGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = (out > 0 && out < 1) ? dout * slope : 0 // dx = (out > 0 && out < 1) ? dout * slope : 0
__device__ __forceinline__ T operator()(const T& dout, const T& out) const { __device__ __forceinline__ T operator()(const T dout, const T out) const {
return (out > zero && out < one) ? dout * static_cast<T>(slope) : zero; return (out > zero && out < one) ? dout * static_cast<T>(slope) : zero;
} }
...@@ -1112,7 +1112,7 @@ struct CudaSwishFunctor : public BaseActivationFunctor<T> { ...@@ -1112,7 +1112,7 @@ struct CudaSwishFunctor : public BaseActivationFunctor<T> {
} }
// swish(x) = x / (1 + exp(-beta * x)) // swish(x) = x / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta); MPType b = static_cast<MPType>(beta);
return static_cast<T>(x / (one + exp(-b * x))); return static_cast<T>(x / (one + exp(-b * x)));
...@@ -1130,8 +1130,8 @@ struct CudaSwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1130,8 +1130,8 @@ struct CudaSwishGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2) // dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta); MPType b = static_cast<MPType>(beta);
...@@ -1159,7 +1159,7 @@ struct CudaMishFunctor : public BaseActivationFunctor<T> { ...@@ -1159,7 +1159,7 @@ struct CudaMishFunctor : public BaseActivationFunctor<T> {
// softplus(x) = x, if x > threshold // softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise // = ln(1 + exp(x)), otherwise
// Inputs: args[0], the input x // Inputs: args[0], the input x
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x)); MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
return static_cast<T>(x * tanh(sp)); return static_cast<T>(x * tanh(sp));
...@@ -1180,8 +1180,8 @@ struct CudaMishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1180,8 +1180,8 @@ struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
// sp = softplus(x) // sp = softplus(x)
// Inputs: args[0], the input dout // Inputs: args[0], the input dout
// args[1], the input x // args[1], the input x
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x)); MPType sp = (x > static_cast<MPType>(threshold)) ? x : log(one + exp(x));
...@@ -1204,7 +1204,7 @@ struct CudaThresholdedReluFunctor : public BaseActivationFunctor<T> { ...@@ -1204,7 +1204,7 @@ struct CudaThresholdedReluFunctor : public BaseActivationFunctor<T> {
} }
// thresholded_relu(x) = x > threshold ? x : 0 // thresholded_relu(x) = x > threshold ? x : 0
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
return x > static_cast<T>(threshold) ? x : zero; return x > static_cast<T>(threshold) ? x : zero;
} }
}; };
...@@ -1219,7 +1219,7 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -1219,7 +1219,7 @@ struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
} }
// dx = x > threshold ? dout : 0 // dx = x > threshold ? dout : 0
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
return x > static_cast<T>(threshold) ? dout : zero; return x > static_cast<T>(threshold) ? dout : zero;
} }
...@@ -1241,7 +1241,7 @@ struct CudaHardSwishFunctor : public BaseActivationFunctor<T> { ...@@ -1241,7 +1241,7 @@ struct CudaHardSwishFunctor : public BaseActivationFunctor<T> {
// x , when x >= threshold - offset // x , when x >= threshold - offset
// x * (x + offset) / scale, otherwise // x * (x + offset) / scale, otherwise
// threshold = scale = 6, offset = 3 by default // threshold = scale = 6, offset = 3 by default
__device__ __forceinline__ T operator()(const T& x) const { __device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold); T t = static_cast<T>(threshold);
T temp = x + static_cast<T>(offset); T temp = x + static_cast<T>(offset);
T temp_max = temp > zero ? temp : zero; T temp_max = temp > zero ? temp : zero;
...@@ -1267,7 +1267,7 @@ struct CudaHardSwishGradFunctor : public BaseActivationFunctor<T> { ...@@ -1267,7 +1267,7 @@ struct CudaHardSwishGradFunctor : public BaseActivationFunctor<T> {
// dout , when x >= threshold - offset // dout , when x >= threshold - offset
// dout * (2 * x / scale + offset / scale), otherwise // dout * (2 * x / scale + offset / scale), otherwise
// threshold = scale = 6, offset = 3 by default // threshold = scale = 6, offset = 3 by default
__device__ __forceinline__ T operator()(const T& dout, const T& x) const { __device__ __forceinline__ T operator()(const T dout, const T x) const {
T o = static_cast<T>(offset); T o = static_cast<T>(offset);
T s = static_cast<T>(scale); T s = static_cast<T>(scale);
T temp1 = static_cast<T>(x + o > zero); T temp1 = static_cast<T>(x + o > zero);
...@@ -1291,7 +1291,7 @@ struct CudaELUFunctor : public BaseActivationFunctor<T> { ...@@ -1291,7 +1291,7 @@ struct CudaELUFunctor : public BaseActivationFunctor<T> {
// elu(x) = x, if x > 0 // elu(x) = x, if x > 0
// elu(x) = alpha * (e^x - 1), if x <= 0 // elu(x) = alpha * (e^x - 1), if x <= 0
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
CT x = static_cast<CT>(arg_x); CT x = static_cast<CT>(arg_x);
CT temp = static_cast<CT>(alpha) * (exp(x) - one); CT temp = static_cast<CT>(alpha) * (exp(x) - one);
CT res = x > zero ? x : temp; CT res = x > zero ? x : temp;
...@@ -1312,8 +1312,7 @@ struct CudaELUGradFunctor : public BaseActivationFunctor<T> { ...@@ -1312,8 +1312,7 @@ struct CudaELUGradFunctor : public BaseActivationFunctor<T> {
// case 1: alpha >= 0 // case 1: alpha >= 0
// dx = dout, if out > 0 // dx = dout, if out > 0
// dx = dout * (out + alpha), if out <= 0 // dx = dout * (out + alpha), if out <= 0
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(T arg_dout, T arg_out) const {
const T& arg_out) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType out = static_cast<MPType>(arg_out); MPType out = static_cast<MPType>(arg_out);
MPType a = static_cast<MPType>(alpha); MPType a = static_cast<MPType>(alpha);
...@@ -1338,8 +1337,8 @@ struct CudaELUGradNegativeAlphaFunctor : public BaseActivationFunctor<T> { ...@@ -1338,8 +1337,8 @@ struct CudaELUGradNegativeAlphaFunctor : public BaseActivationFunctor<T> {
// case 2: alpha < 0 // case 2: alpha < 0
// dx = dout, if x > 0 // dx = dout, if x > 0
// dx = dout * (out + alpha), if x <=0 // dx = dout * (out + alpha), if x <=0
__device__ __forceinline__ T operator()(const T& arg_dout, const T& arg_out, __device__ __forceinline__ T operator()(const T arg_dout, const T arg_out,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType out = static_cast<MPType>(arg_out); MPType out = static_cast<MPType>(arg_out);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
...@@ -1393,7 +1392,7 @@ struct CudaCELUFunctor : public BaseActivationFunctor<T> { ...@@ -1393,7 +1392,7 @@ struct CudaCELUFunctor : public BaseActivationFunctor<T> {
} }
// celu(x) = max(0, x) + min(0, alpha * (exp(x/alpha) - 1)) // celu(x) = max(0, x) + min(0, alpha * (exp(x/alpha) - 1))
__device__ __forceinline__ T operator()(const T& arg_x) const { __device__ __forceinline__ T operator()(const T arg_x) const {
CT x = static_cast<CT>(arg_x); CT x = static_cast<CT>(arg_x);
CT temp = static_cast<CT>(alpha) * (exp(x / static_cast<CT>(alpha)) - one); CT temp = static_cast<CT>(alpha) * (exp(x / static_cast<CT>(alpha)) - one);
CT res = (x > zero ? x : zero) + (temp > zero ? zero : temp); CT res = (x > zero ? x : zero) + (temp > zero ? zero : temp);
...@@ -1416,8 +1415,8 @@ struct CudaCELUGradFunctor : public BaseActivationFunctor<T> { ...@@ -1416,8 +1415,8 @@ struct CudaCELUGradFunctor : public BaseActivationFunctor<T> {
// dx = dout * (x/alpha).exp(), if alpha > 0 and x <= 0 // dx = dout * (x/alpha).exp(), if alpha > 0 and x <= 0
// dx = dout , if alpha < 0 and x > 0 // dx = dout , if alpha < 0 and x > 0
// dx = dout * (x/alpha).exp(), if alpha < 0 and x <=0 // dx = dout * (x/alpha).exp(), if alpha < 0 and x <=0
__device__ __forceinline__ T operator()(const T& arg_dout, __device__ __forceinline__ T operator()(const T arg_dout,
const T& arg_x) const { const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout); MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x); MPType x = static_cast<MPType>(arg_x);
MPType a = static_cast<MPType>(alpha); MPType a = static_cast<MPType>(alpha);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册