elementwise_functor.h 9.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include "paddle/fluid/platform/complex.h"
18
#include "paddle/pten/core/utils/array.h"
19
#include "paddle/pten/kernels/funcs/elementwise_functor.h"
20 21 22 23 24

namespace paddle {
namespace operators {

// Define the binary functors used in elementwise ops.
25
// Note: InverseXxxFunctor is needed when calling ElementwiseComputeEx on CPU.
26 27 28

// Add
template <typename T>
29 30
using AddFunctor = pten::funcs::AddFunctor<T>;

31
template <typename T>
32
using InverseAddFunctor = pten::funcs::InverseAddFunctor<T>;
33 34 35

// Subtract
template <typename T>
36 37
using SubFunctor = pten::funcs::SubtractFunctor<T>;

38
template <typename T>
39
using InverseSubFunctor = pten::funcs::InverseSubtractFunctor<T>;
40 41 42

// Multiply
template <typename T>
43 44
using MulFunctor = pten::funcs::MultiplyFunctor<T>;

45
template <typename T>
46
using InverseMulFunctor = pten::funcs::InverseMultiplyFunctor<T>;
47 48 49

// Divide
template <typename T>
50
using DivFunctor = pten::funcs::DivideFunctor<T>;
51

52 53
template <typename T>
using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;
54 55 56 57

// Floor Divide
template <typename T>
struct FloorDivFunctor {
58
  inline HOSTDEVICE T operator()(const T a, const T b) const {
59 60 61 62 63 64 65
    PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
    return static_cast<T>(std::trunc(a / b));
  }
};

template <typename T>
struct InverseFloorDivFunctor {
66
  inline HOSTDEVICE T operator()(const T a, const T b) const {
67 68 69 70 71 72 73 74 75 76
    PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
    return static_cast<T>(std::trunc(b / a));
  }
};

#undef DIV_ERROR_INFO

// Maximum
template <typename T>
struct MaxFunctor {
77
  inline HOSTDEVICE T operator()(const T a, const T b) const {
78 79 80 81 82 83 84
    return a > b ? a : b;
  }
};

// Minmum
template <typename T>
struct MinFunctor {
85
  inline HOSTDEVICE T operator()(const T a, const T b) const {
86 87 88 89
    return a < b ? a : b;
  }
};

90 91 92 93 94
template <typename T>
using Complex = paddle::platform::complex<T>;

template <typename InT, typename OutT>
struct DivGradXYFunctor {
95 96
  inline HOSTDEVICE pten::Array<OutT, 2> operator()(const InT a, const InT b,
                                                    const InT c) {
97 98
    // dx = dout / y
    // dy = - dout * out / y
99
    pten::Array<OutT, 2> outs;
100 101 102 103 104 105 106 107
    outs[0] = a / c;
    outs[1] = -a * b / c;
    return outs;
  }
};

template <typename InT, typename OutT>
struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> {
108
  inline HOSTDEVICE pten::Array<Complex<OutT>, 2> operator()(
109
      const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
110
    pten::Array<Complex<OutT>, 2> outs;
111 112 113 114 115 116 117 118 119 120 121
    Complex<InT> c_conj(c.real, -c.imag);
    Complex<InT> out_div_c_conj((b / c).real, -(b / c).imag);
    outs[0] = a / c_conj;
    outs[1] = -a * out_div_c_conj;
    return outs;
  }
};

// Float div grad
template <typename T>
struct DivGradXFunctor {
122
  inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
123 124 125 126 127
};

// Complex div grad
template <typename T>
struct DivGradXFunctor<Complex<T>> {
128 129
  inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
                                          const Complex<T> b) const {
130 131 132 133 134 135 136 137
    Complex<T> b_conj(b.real, -b.imag);
    return a / b_conj;
  }
};

// Float mul and div
template <typename T>
struct DivGradYFunctor {
138
  inline HOSTDEVICE T operator()(const T a, const T b, const T c) const {
139 140 141 142 143 144 145
    return -a * b / c;
  }
};

// Complex mul and div
template <typename T>
struct DivGradYFunctor<Complex<T>> {
146 147 148
  inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
                                          const Complex<T> b,
                                          const Complex<T> c) const {
149 150 151 152 153
    Complex<T> out_div_c_conj((b / c).real, -(b / c).imag);
    return -a * out_div_c_conj;
  }
};

L
LJQ❤️ 已提交
154 155 156
// Fmax
template <typename T>
struct FMaxFunctor {
157
  inline HOSTDEVICE T operator()(const T a, const T b) const {
L
LJQ❤️ 已提交
158 159 160 161 162 163 164
    return std::fmax(a, b);
  }
};

template <>
struct FMaxFunctor<paddle::platform::float16> {
  inline HOSTDEVICE paddle::platform::float16 operator()(
165 166
      const paddle::platform::float16 a,
      const paddle::platform::float16 b) const {
L
LJQ❤️ 已提交
167 168 169 170 171 172 173
    float float_a = static_cast<float>(a);
    float float_b = static_cast<float>(b);
    auto result = std::fmax(float_a, float_b);
    return static_cast<paddle::platform::float16>(result);
  }
};

174 175
template <>
struct FMaxFunctor<int> {
176
  inline HOSTDEVICE int operator()(const int a, const int b) const {
177 178 179 180 181 182 183 184 185
    float float_a = static_cast<float>(a);
    float float_b = static_cast<float>(b);
    auto result = std::fmax(float_a, float_b);
    return std::lrint(result);
  }
};

template <>
struct FMaxFunctor<int64_t> {
186
  inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
187 188 189 190 191 192 193
    double double_a = static_cast<double>(a);
    double double_b = static_cast<double>(b);
    auto result = std::fmax(double_a, double_b);
    return std::llrint(result);
  }
};

L
LJQ❤️ 已提交
194 195 196
// Fmin
template <typename T>
struct FMinFunctor {
197
  inline HOSTDEVICE T operator()(const T a, const T b) const {
L
LJQ❤️ 已提交
198 199 200 201 202 203 204
    return std::fmin(a, b);
  }
};

template <>
struct FMinFunctor<paddle::platform::float16> {
  inline HOSTDEVICE paddle::platform::float16 operator()(
205 206
      const paddle::platform::float16 a,
      const paddle::platform::float16 b) const {
L
LJQ❤️ 已提交
207 208 209 210 211 212 213
    float float_a = static_cast<float>(a);
    float float_b = static_cast<float>(b);
    auto result = std::fmin(float_a, float_b);
    return static_cast<paddle::platform::float16>(result);
  }
};

214 215
template <>
struct FMinFunctor<int> {
216
  inline HOSTDEVICE int operator()(const int a, const int b) const {
217 218 219 220 221 222 223 224 225
    float float_a = static_cast<float>(a);
    float float_b = static_cast<float>(b);
    auto result = std::fmin(float_a, float_b);
    return std::lrint(result);
  }
};

template <>
struct FMinFunctor<int64_t> {
226
  inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
227 228 229 230 231 232 233
    double double_a = static_cast<double>(a);
    double double_b = static_cast<double>(b);
    auto result = std::fmin(double_a, double_b);
    return std::llrint(result);
  }
};

234 235
template <typename T>
struct MinGradXFunctor {
236
  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
237 238 239 240 241
    return dout * static_cast<T>(x < y);
  }
};
template <typename T>
struct MinGradYFunctor {
242
  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
243 244 245 246 247 248
    return dout * static_cast<T>(x >= y);
  }
};

template <typename InT, typename OutT>
struct MinGradXYFunctor {
249 250 251
  inline HOSTDEVICE pten::Array<OutT, 2> operator()(const InT x, const InT y,
                                                    const InT dout) {
    pten::Array<OutT, 2> outs;
252 253 254 255 256 257 258 259
    // dx = dout * (x < y)
    outs[0] = static_cast<OutT>(dout * static_cast<InT>(x < y));
    // dy = dout * (x >= y)
    outs[1] = static_cast<OutT>(dout * static_cast<InT>(x >= y));
    return outs;
  }
};

260 261
template <typename T>
struct MulGradFunctor {
262
  inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
263 264 265
};
template <typename T>
struct MulGradFunctor<Complex<T>> {
266 267
  inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
                                          const Complex<T> b) const {
268 269 270 271 272 273 274
    Complex<T> b_conj(b.real, -b.imag);
    return a * b_conj;
  }
};

template <typename InT, typename OutT>
struct MulGradXYFunctor {
275 276 277
  inline HOSTDEVICE pten::Array<OutT, 2> operator()(const InT a, const InT b,
                                                    const InT c) {
    pten::Array<OutT, 2> outs;
278 279 280 281 282 283 284 285 286 287
    // dx = dout * y
    outs[0] = a * b;
    // dy = dout * x
    outs[1] = a * c;
    return outs;
  }
};

template <typename InT, typename OutT>
struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> {
288
  inline HOSTDEVICE pten::Array<Complex<OutT>, 2> operator()(
289
      const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
290
    pten::Array<Complex<OutT>, 2> outs;
291 292 293 294 295 296 297 298 299 300
    // dx = dout * y
    Complex<InT> b_conj(b.real, -b.imag);
    outs[0] = a * b_conj;
    // dy = dout * x
    Complex<InT> c_conj(c.real, -c.imag);
    outs[1] = a * c_conj;
    return outs;
  }
};

301 302 303
// Ternary compare
template <typename T>
struct MaxGradXFunctor {
304
  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
305 306 307 308 309
    return dout * static_cast<T>(x > y);
  }
};
template <typename T>
struct MaxGradYFunctor {
310
  inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const {
311 312 313 314 315 316
    return dout * static_cast<T>(x <= y);
  }
};

template <typename InT, typename OutT>
struct MaxGradXYFunctor {
317 318 319
  inline HOSTDEVICE pten::Array<OutT, 2> operator()(const InT x, const InT y,
                                                    const InT dout) {
    pten::Array<OutT, 2> outs;
320 321 322 323 324 325 326 327
    // dx = dout * (x > y)
    outs[0] = static_cast<OutT>(dout * static_cast<InT>(x > y));
    // dy = dout * (x <= y)
    outs[1] = static_cast<OutT>(dout * static_cast<InT>(x <= y));
    return outs;
  }
};

328 329
}  // namespace operators
}  // namespace paddle