diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h index a8c9640d479d3e92a64a5797b332621414048458..e80dfba325937796d5539022850356bd1addd3ca 100644 --- a/paddle/fluid/operators/elementwise/elementwise_functor.h +++ b/paddle/fluid/operators/elementwise/elementwise_functor.h @@ -54,7 +54,7 @@ using InverseDivFunctor = pten::funcs::InverseDivideFunctor; // Floor Divide template struct FloorDivFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); return static_cast(std::trunc(a / b)); } @@ -62,7 +62,7 @@ struct FloorDivFunctor { template struct InverseFloorDivFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO); return static_cast(std::trunc(b / a)); } @@ -73,7 +73,7 @@ struct InverseFloorDivFunctor { // Maximum template struct MaxFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { return a > b ? a : b; } }; @@ -81,7 +81,7 @@ struct MaxFunctor { // Minmum template struct MinFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { return a < b ? a : b; } }; @@ -119,14 +119,14 @@ struct DivGradXYFunctor, Complex> { // Float div grad template struct DivGradXFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; } }; // Complex div grad template struct DivGradXFunctor> { - inline HOSTDEVICE Complex operator()(const Complex& a, - const Complex& b) const { + inline HOSTDEVICE Complex operator()(const Complex a, + const Complex b) const { Complex b_conj(b.real, -b.imag); return a / b_conj; } @@ -135,7 +135,7 @@ struct DivGradXFunctor> { // Float mul and div template struct DivGradYFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b, const T& c) const { + inline HOSTDEVICE T operator()(const T a, const T b, const T c) const { return -a * b / c; } }; @@ -143,9 +143,9 @@ struct DivGradYFunctor { // Complex mul and div template struct DivGradYFunctor> { - inline HOSTDEVICE Complex operator()(const Complex& a, - const Complex& b, - const Complex& c) const { + inline HOSTDEVICE Complex operator()(const Complex a, + const Complex b, + const Complex c) const { Complex out_div_c_conj((b / c).real, -(b / c).imag); return -a * out_div_c_conj; } @@ -154,7 +154,7 @@ struct DivGradYFunctor> { // Fmax template struct FMaxFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { return std::fmax(a, b); } }; @@ -162,8 +162,8 @@ struct FMaxFunctor { template <> struct FMaxFunctor { inline HOSTDEVICE paddle::platform::float16 operator()( - const paddle::platform::float16& a, - const paddle::platform::float16& b) const { + const paddle::platform::float16 a, + const paddle::platform::float16 b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmax(float_a, float_b); @@ -173,7 +173,7 @@ struct FMaxFunctor { template <> struct FMaxFunctor { - inline HOSTDEVICE int operator()(const int& a, const int& b) const { + inline HOSTDEVICE int operator()(const int a, const int b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmax(float_a, float_b); @@ -183,8 +183,7 @@ struct FMaxFunctor { template <> struct FMaxFunctor { - inline HOSTDEVICE int64_t operator()(const int64_t& a, - const int64_t& b) const { + inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const { double double_a = static_cast(a); double double_b = static_cast(b); auto result = std::fmax(double_a, double_b); @@ -195,7 +194,7 @@ struct FMaxFunctor { // Fmin template struct FMinFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { return std::fmin(a, b); } }; @@ -203,8 +202,8 @@ struct FMinFunctor { template <> struct FMinFunctor { inline HOSTDEVICE paddle::platform::float16 operator()( - const paddle::platform::float16& a, - const paddle::platform::float16& b) const { + const paddle::platform::float16 a, + const paddle::platform::float16 b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmin(float_a, float_b); @@ -214,7 +213,7 @@ struct FMinFunctor { template <> struct FMinFunctor { - inline HOSTDEVICE int operator()(const int& a, const int& b) const { + inline HOSTDEVICE int operator()(const int a, const int b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmin(float_a, float_b); @@ -224,8 +223,7 @@ struct FMinFunctor { template <> struct FMinFunctor { - inline HOSTDEVICE int64_t operator()(const int64_t& a, - const int64_t& b) const { + inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const { double double_a = static_cast(a); double double_b = static_cast(b); auto result = std::fmin(double_a, double_b); @@ -261,12 +259,12 @@ struct MinGradXYFunctor { template struct MulGradFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; } }; template struct MulGradFunctor> { - inline HOSTDEVICE Complex operator()(const Complex& a, - const Complex& b) const { + inline HOSTDEVICE Complex operator()(const Complex a, + const Complex b) const { Complex b_conj(b.real, -b.imag); return a * b_conj; } @@ -274,9 +272,9 @@ struct MulGradFunctor> { template struct MulGradXYFunctor { - inline HOSTDEVICE paddle::framework::Array operator()(const InT& a, - const InT& b, - const InT& c) { + inline HOSTDEVICE paddle::framework::Array operator()(const InT a, + const InT b, + const InT c) { paddle::framework::Array outs; // dx = dout * y outs[0] = a * b; @@ -289,7 +287,7 @@ struct MulGradXYFunctor { template struct MulGradXYFunctor, Complex> { inline HOSTDEVICE paddle::framework::Array, 2> operator()( - const Complex& a, const Complex& b, const Complex& c) { + const Complex a, const Complex b, const Complex c) { paddle::framework::Array, 2> outs; // dx = dout * y Complex b_conj(b.real, -b.imag); diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu b/paddle/fluid/operators/elementwise/elementwise_mod_op.cu index d2106645a472791fe37bddda4deadd938920dd8d..4ef957c617870e67e880afcff022fbef73dc8e5b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cu @@ -20,31 +20,6 @@ namespace plat = paddle::platform; namespace paddle { namespace operators { -template -struct CudaModFunctor { - inline HOSTDEVICE T operator()(const T* args) const { - T res = args[0] % args[1]; - - // Accoding to #PR26732: in dividen % divsor - // remainder shall have the same sign as divsor. - if ((res != 0) && ((args[1] ^ res) < 0)) res += args[1]; - return res; - } -}; - -template -struct CudaModFunctor< - T, typename std::enable_if_t::value>> { - inline HOSTDEVICE T operator()(const T* args) const { - T res = fmod(args[0], args[1]); - - // Accoding to #PR26732: in dividen % divsor - // remainder shall have the same sign as divsor. - if ((res != 0) && ((res < 0) != (args[1] < 0))) res += args[1]; - return res; - } -}; - template class ElementwiseModKernel : public framework::OpKernel { @@ -56,7 +31,7 @@ class ElementwiseModKernel ctx.template device_context(); int axis = PackTensorsIntoVector(ctx, &ins, &outs); LaunchElementwiseCudaKernel( - cuda_ctx, ins, &outs, axis, CudaModFunctor()); + cuda_ctx, ins, &outs, axis, ModFunctor()); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.h b/paddle/fluid/operators/elementwise/elementwise_mod_op.h index 66c3e553c141fd925d64adb037525d016355dedd..bfb12e44b6b94cfcdbc0b2eceb03c73733ff7774 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.h @@ -19,29 +19,36 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template struct ModFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { T res = a % b; - if ((res != 0) && ((res < 0) != (b < 0))) res += b; + + // Accoding to #PR26732: in dividen % divsor + // remainder shall have the same sign as divsor. + if ((res != 0) && ((b ^ res) < 0)) res += b; return res; } }; template -struct InverseModFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { - T res = b % a; - if ((res != 0) && ((res < 0) != (a < 0))) res += a; +struct ModFunctor::value>> { + inline HOSTDEVICE T operator()(const T a, const T b) const { + T res = fmod(a, b); + + // Accoding to #PR26732: in dividen % divsor + // remainder shall have the same sign as divsor. + if ((res != 0) && ((res < 0) != (b < 0))) res += b; return res; } }; template -struct ModFunctorFP { +struct InverseModFunctor { inline HOSTDEVICE T operator()(T a, T b) const { - T res = fmod(a, b); - if ((res != 0) && ((b < 0) != (res < 0))) res += b; + T res = b % a; + if ((res != 0) && ((res < 0) != (a < 0))) res += a; return res; } }; @@ -79,8 +86,8 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx, auto x_dims = x->dims(); auto y_dims = y->dims(); if (x_dims.size() >= y_dims.size()) { - ElementwiseComputeEx, DeviceContext, T>( - ctx, x, y, axis, ModFunctorFP(), z); + ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, + ModFunctor(), z); } else { ElementwiseComputeEx, DeviceContext, T>( ctx, x, y, axis, InverseModFunctorFP(), z); diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu index 0f3aa8c3e1b9b152e8d941407841248b938bcfe3..722a53d188061b91d9c880fafac11bf70107bf6a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu @@ -16,26 +16,6 @@ namespace ops = paddle::operators; namespace paddle { namespace operators { -template -struct CudaPowFunctor { - inline HOSTDEVICE T operator()(const T args[]) const { - return std::pow(args[0], args[1]); - } -}; - -template -struct CudaPowFunctor< - T, typename std::enable_if::value>::type> { - // On CUDAPlace, std::pow(3, 1) calls pow(float, float), and - // it will return a float number like 2.99... , which floor to 2 - // when cast to int by default and it is wrong. - // Use llrint to cast it to the nearest integer, which is 3. - inline HOSTDEVICE T operator()(const T args[]) const { - return std::llrint( - std::pow(static_cast(args[0]), static_cast(args[1]))); - } -}; - template class ElementwisePowKernel : public framework::OpKernel { @@ -48,7 +28,7 @@ class ElementwisePowKernel int axis = PackTensorsIntoVector(ctx, &ins, &outs); LaunchElementwiseCudaKernel( - cuda_ctx, ins, &outs, axis, CudaPowFunctor()); + cuda_ctx, ins, &outs, axis, PowFunctor()); } }; diff --git a/paddle/pten/kernels/funcs/elementwise_functor.h b/paddle/pten/kernels/funcs/elementwise_functor.h index 9b2519b0fd6b1418a9e73d3c3985bf6ce99dbbbd..7f33150739e1c50383c33d84445cff6eaa450983 100644 --- a/paddle/pten/kernels/funcs/elementwise_functor.h +++ b/paddle/pten/kernels/funcs/elementwise_functor.h @@ -26,31 +26,31 @@ namespace funcs { // Add template struct AddFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a + b; } }; template struct InverseAddFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return b + a; } }; // Subtract template struct SubtractFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; } }; template struct InverseSubtractFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return b - a; } }; // Multiply template struct MultiplyFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; } }; template struct InverseMultiplyFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return b * a; } }; // Divide @@ -60,14 +60,14 @@ struct InverseMultiplyFunctor { template struct DivideFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; } }; template struct DivideFunctor< T, typename std::enable_if::value>::type> { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { // For int32/int64, need to check whether the divison is zero. PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); return a / b; @@ -76,7 +76,7 @@ struct DivideFunctor< template struct InverseDivideFunctor { - inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; } + inline HOSTDEVICE T operator()(const T a, const T b) const { return b / a; } }; } // namespace funcs