/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "paddle/fluid/platform/complex.h" #include "paddle/pten/core/utils/array.h" #include "paddle/pten/kernels/funcs/elementwise_functor.h" namespace paddle { namespace operators { // Define the binary functors used in elementwise ops. // Note: InverseXxxFunctor is needed when calling ElementwiseComputeEx on CPU. // Add template using AddFunctor = pten::funcs::AddFunctor; template using InverseAddFunctor = pten::funcs::InverseAddFunctor; // Subtract template using SubFunctor = pten::funcs::SubtractFunctor; template using InverseSubFunctor = pten::funcs::InverseSubtractFunctor; // Multiply template using MulFunctor = pten::funcs::MultiplyFunctor; template using InverseMulFunctor = pten::funcs::InverseMultiplyFunctor; // Divide template using DivFunctor = pten::funcs::DivideFunctor; template using InverseDivFunctor = pten::funcs::InverseDivideFunctor; // Floor Divide template struct FloorDivFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); return static_cast(std::trunc(a / b)); } }; template struct InverseFloorDivFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO); return static_cast(std::trunc(b / a)); } }; #undef DIV_ERROR_INFO // Maximum template struct MaxFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return a > b ? a : b; } }; // Minmum template struct MinFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return a < b ? a : b; } }; template using Complex = paddle::platform::complex; template struct DivGradXYFunctor { inline HOSTDEVICE pten::framework::Array operator()(const InT a, const InT b, const InT c) { // dx = dout / y // dy = - dout * out / y pten::framework::Array outs; outs[0] = a / c; outs[1] = -a * b / c; return outs; } }; template struct DivGradXYFunctor, Complex> { inline HOSTDEVICE pten::framework::Array, 2> operator()( const Complex a, const Complex b, const Complex c) { pten::framework::Array, 2> outs; Complex c_conj(c.real, -c.imag); Complex out_div_c_conj((b / c).real, -(b / c).imag); outs[0] = a / c_conj; outs[1] = -a * out_div_c_conj; return outs; } }; // Float div grad template struct DivGradXFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; } }; // Complex div grad template struct DivGradXFunctor> { inline HOSTDEVICE Complex operator()(const Complex a, const Complex b) const { Complex b_conj(b.real, -b.imag); return a / b_conj; } }; // Float mul and div template struct DivGradYFunctor { inline HOSTDEVICE T operator()(const T a, const T b, const T c) const { return -a * b / c; } }; // Complex mul and div template struct DivGradYFunctor> { inline HOSTDEVICE Complex operator()(const Complex a, const Complex b, const Complex c) const { Complex out_div_c_conj((b / c).real, -(b / c).imag); return -a * out_div_c_conj; } }; // Fmax template struct FMaxFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return std::fmax(a, b); } }; template <> struct FMaxFunctor { inline HOSTDEVICE paddle::platform::float16 operator()( const paddle::platform::float16 a, const paddle::platform::float16 b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmax(float_a, float_b); return static_cast(result); } }; template <> struct FMaxFunctor { inline HOSTDEVICE int operator()(const int a, const int b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmax(float_a, float_b); return std::lrint(result); } }; template <> struct FMaxFunctor { inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const { double double_a = static_cast(a); double double_b = static_cast(b); auto result = std::fmax(double_a, double_b); return std::llrint(result); } }; // Fmin template struct FMinFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return std::fmin(a, b); } }; template <> struct FMinFunctor { inline HOSTDEVICE paddle::platform::float16 operator()( const paddle::platform::float16 a, const paddle::platform::float16 b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmin(float_a, float_b); return static_cast(result); } }; template <> struct FMinFunctor { inline HOSTDEVICE int operator()(const int a, const int b) const { float float_a = static_cast(a); float float_b = static_cast(b); auto result = std::fmin(float_a, float_b); return std::lrint(result); } }; template <> struct FMinFunctor { inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const { double double_a = static_cast(a); double double_b = static_cast(b); auto result = std::fmin(double_a, double_b); return std::llrint(result); } }; template struct MinGradXFunctor { inline HOSTDEVICE T operator()(const T& x, const T& y, const T& dout) const { return dout * static_cast(x < y); } }; template struct MinGradYFunctor { inline HOSTDEVICE T operator()(const T& x, const T& y, const T& dout) const { return dout * static_cast(x >= y); } }; template struct MinGradXYFunctor { inline HOSTDEVICE pten::framework::Array operator()( const InT& x, const InT& y, const InT& dout) { pten::framework::Array outs; // dx = dout * (x < y) outs[0] = static_cast(dout * static_cast(x < y)); // dy = dout * (x >= y) outs[1] = static_cast(dout * static_cast(x >= y)); return outs; } }; template struct MulGradFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; } }; template struct MulGradFunctor> { inline HOSTDEVICE Complex operator()(const Complex a, const Complex b) const { Complex b_conj(b.real, -b.imag); return a * b_conj; } }; template struct MulGradXYFunctor { inline HOSTDEVICE pten::framework::Array operator()(const InT a, const InT b, const InT c) { pten::framework::Array outs; // dx = dout * y outs[0] = a * b; // dy = dout * x outs[1] = a * c; return outs; } }; template struct MulGradXYFunctor, Complex> { inline HOSTDEVICE pten::framework::Array, 2> operator()( const Complex a, const Complex b, const Complex c) { pten::framework::Array, 2> outs; // dx = dout * y Complex b_conj(b.real, -b.imag); outs[0] = a * b_conj; // dy = dout * x Complex c_conj(c.real, -c.imag); outs[1] = a * c_conj; return outs; } }; // Ternary compare template struct MaxGradXFunctor { inline HOSTDEVICE T operator()(const T& x, const T& y, const T& dout) const { return dout * static_cast(x > y); } }; template struct MaxGradYFunctor { inline HOSTDEVICE T operator()(const T& x, const T& y, const T& dout) const { return dout * static_cast(x <= y); } }; template struct MaxGradXYFunctor { inline HOSTDEVICE pten::framework::Array operator()( const InT& x, const InT& y, const InT& dout) { pten::framework::Array outs; // dx = dout * (x > y) outs[0] = static_cast(dout * static_cast(x > y)); // dy = dout * (x <= y) outs[1] = static_cast(dout * static_cast(x <= y)); return outs; } }; } // namespace operators } // namespace paddle