kernel_base.h 9.9 KB
Newer Older
1
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
T
tensor-tang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. */

#pragma once
16
#include <cstdint>
17

18 19
#include "paddle/phi/core/macros.h"
#include "paddle/phi/kernels/funcs/jit/macro.h"
T
tensor-tang 已提交
20

21
namespace phi {
T
tensor-tang 已提交
22
namespace jit {
T
tensor-tang 已提交
23

24
typedef enum {
T
tensor-tang 已提交
25
  kNone = 0,
26
  // sort by alphabet
27
  kAdam = 1,
28
  kAdamW,
29 30
  kCRFDecoding,
  kEmbSeqPool,
T
tensor-tang 已提交
31 32 33
  kGRUH1,
  kGRUHtPart1,
  kGRUHtPart2,
34 35
  kLSTMCtHt,
  kLSTMC1H1,
T
tensor-tang 已提交
36
  kLayerNorm,
37
  kMatMul,
T
tensor-tang 已提交
38
  kSeqPool,
39 40 41
  kVAdd,
  kVAddBias,
  kVAddRelu,
42
  kVBroadcast,
43
  kVCopy,
44 45 46 47 48
  kVExp,
  kVIdentity,
  kVMul,
  kVRelu,
  kVScal,
49
  kSgd,
50 51 52 53
  kVSigmoid,
  kVSquare,
  kVSub,
  kVTanh,
54
} KernelType;
T
tensor-tang 已提交
55

56 57
typedef enum {
  kNonePoolType = 0,
T
tensor-tang 已提交
58
  kSum = 1,
59 60 61 62
  kAvg,
  kSqrt,
} SeqPoolType;

63
// x, y, z, n
T
tensor-tang 已提交
64
template <typename T>
65
struct XYZNTuple {
T
tensor-tang 已提交
66 67 68 69 70
  typedef T data_type;
  typedef int attr_type;
  typedef void (*func_type)(const T*, const T*, T*, int);
};

71
// a, x, y, n
72
template <typename T>
73
struct AXYNTuple : public XYZNTuple<T> {};
74

75 76 77 78 79 80 81 82
// a, x, y, n, stride
template <typename T>
struct AXYNSTuple {
  typedef T data_type;
  typedef int attr_type;
  typedef void (*func_type)(const T*, const T*, T*, int, int);
};

83
// x, y, n
84
template <typename T>
85
struct XYNTuple {
86 87 88 89 90
  typedef T data_type;
  typedef int attr_type;
  typedef void (*func_type)(const T*, T*, int);
};

91 92 93 94 95 96 97 98
// x, returned value, n, stride
template <typename T>
struct XRNSTuple {
  typedef T data_type;
  typedef int attr_type;
  typedef void (*func_type)(const T*, T*, int, int);
};

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
#define DECLARE_KERNELTUPLE(kernel_tuple, type)        \
  template <typename T>                                \
  struct type##Tuple : public kernel_tuple<T> {        \
    static constexpr KernelType kernel_type = k##type; \
  }

// Tuple should be corresponding to the KernelType
DECLARE_KERNELTUPLE(XYZNTuple, VMul);
DECLARE_KERNELTUPLE(XYZNTuple, VAdd);
DECLARE_KERNELTUPLE(XYZNTuple, VAddRelu);
DECLARE_KERNELTUPLE(XYZNTuple, VSub);

DECLARE_KERNELTUPLE(AXYNTuple, VScal);
DECLARE_KERNELTUPLE(AXYNTuple, VAddBias);

DECLARE_KERNELTUPLE(XYNTuple, VRelu);
DECLARE_KERNELTUPLE(XYNTuple, VIdentity);
DECLARE_KERNELTUPLE(XYNTuple, VSquare);
DECLARE_KERNELTUPLE(XYNTuple, VExp);
DECLARE_KERNELTUPLE(XYNTuple, VSigmoid);
DECLARE_KERNELTUPLE(XYNTuple, VTanh);
DECLARE_KERNELTUPLE(XYNTuple, VCopy);

T
tensor-tang 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
typedef struct {
  void* gates;  // gates: x_ch, x_ih, x_fh, x_oh
  const void* ct_1;
  void* ct;
  void* ht;
  /* weight_peephole and checked data are only used in peephole*/
  const void* wp{nullptr};  //  W_ic, W_fc, W_oc
  void* checked{nullptr};   // size: 2 * d
} lstm_t;

typedef struct {
  void* gates;  // gates: {x_update, x_reset; x_state}
  const void* ht_1;
  void* ht;
} gru_t;

struct rnn_attr_s {
  int d;
  KernelType act_gate, act_cand;
  rnn_attr_s() = default;
T
tensor-tang 已提交
142
  explicit rnn_attr_s(int _d, KernelType _act_gate, KernelType _act_cand)
T
tensor-tang 已提交
143 144 145 146 147 148 149
      : d(_d), act_gate(_act_gate), act_cand(_act_cand) {}
};

struct lstm_attr_s : public rnn_attr_s {
  bool use_peephole;
  KernelType act_cell;
  lstm_attr_s() = default;
150 151 152 153 154
  explicit lstm_attr_s(int _d,
                       KernelType _act_gate,
                       KernelType _act_cand,
                       KernelType _act_cell,
                       bool _use_peephole = false)
T
tensor-tang 已提交
155 156 157 158 159 160 161 162 163
      : rnn_attr_s(_d, _act_gate, _act_cand),
        use_peephole(_use_peephole),
        act_cell(_act_cell) {}
};

typedef struct rnn_attr_s gru_attr_t;
typedef struct lstm_attr_s lstm_attr_t;

template <typename T>
164
struct LSTMTuple {
T
tensor-tang 已提交
165 166 167 168 169
  typedef T data_type;
  typedef lstm_attr_t attr_type;
  typedef void (*func_type)(lstm_t*, const lstm_attr_t*);
};

170
template <typename T>
171
struct GRUTuple {
172 173 174 175 176
  typedef T data_type;
  typedef gru_attr_t attr_type;
  typedef void (*func_type)(gru_t*, const gru_attr_t*);
};

177 178 179 180 181 182 183 184 185
DECLARE_KERNELTUPLE(LSTMTuple, LSTMCtHt);
DECLARE_KERNELTUPLE(LSTMTuple, LSTMC1H1);

DECLARE_KERNELTUPLE(GRUTuple, GRUH1);
DECLARE_KERNELTUPLE(GRUTuple, GRUHtPart1);
DECLARE_KERNELTUPLE(GRUTuple, GRUHtPart2);

#undef DECLARE_KERNELTUPLE

186
template <typename T>
187 188
struct VBroadcastTuple {
  static constexpr KernelType kernel_type = kVBroadcast;
189 190 191 192 193
  typedef T data_type;
  typedef int64_t attr_type;
  typedef void (*func_type)(const T*, T*, int64_t, int64_t);
};

194
typedef struct seq_pool_attr_s {
T
tensor-tang 已提交
195
  int h, w;  // h should always be the first one
T
tensor-tang 已提交
196
  SeqPoolType type;
197
  seq_pool_attr_s() = default;
T
tensor-tang 已提交
198
  explicit seq_pool_attr_s(int width, SeqPoolType pool_type, int height = 1)
199
      : h(height), w(width), type(pool_type) {}
T
tensor-tang 已提交
200 201 202
} seq_pool_attr_t;

template <typename T>
203 204
struct SeqPoolTuple {
  static constexpr KernelType kernel_type = kSeqPool;
T
tensor-tang 已提交
205 206 207 208 209
  typedef T data_type;
  typedef seq_pool_attr_t attr_type;
  typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*);
};

210 211 212 213 214 215
typedef struct emb_seq_pool_attr_s {
  int64_t table_height, table_width;
  int64_t index_height, index_width;
  int64_t out_width;
  SeqPoolType pool_type;
  emb_seq_pool_attr_s() = default;
216 217 218 219
  explicit emb_seq_pool_attr_s(int64_t tbl_height,
                               int64_t tbl_width,
                               int64_t idx_height,
                               int64_t idx_width,
220 221 222 223 224 225 226 227 228 229 230
                               int64_t output_width,
                               SeqPoolType seqpool_type = SeqPoolType::kSum)
      : table_height(tbl_height),
        table_width(tbl_width),
        index_height(idx_height),
        index_width(idx_width),
        out_width(output_width),
        pool_type(seqpool_type) {}
} emb_seq_pool_attr_t;

template <typename T>
231 232
struct EmbSeqPoolTuple {
  static constexpr KernelType kernel_type = kEmbSeqPool;
233 234
  typedef T data_type;
  typedef emb_seq_pool_attr_t attr_type;
235 236 237
  typedef void (*func_type)(const T*,
                            const int64_t*,
                            T*,
238 239 240
                            const emb_seq_pool_attr_t*);
};

241 242 243 244 245
typedef struct sgd_attr_s {
  int64_t param_height, param_width;
  int64_t grad_height, grad_width;
  int64_t selected_rows_size;
  sgd_attr_s() = default;
246 247 248 249 250
  explicit sgd_attr_s(int64_t param_h,
                      int64_t param_w,
                      int64_t grad_h,
                      int64_t grad_w,
                      int64_t selected_rows_sz)
251 252 253 254 255 256 257 258
      : param_height(param_h),
        param_width(param_w),
        grad_height(grad_h),
        grad_width(grad_w),
        selected_rows_size(selected_rows_sz) {}
} sgd_attr_t;

template <typename T>
259 260
struct SgdTuple {
  static constexpr KernelType kernel_type = kSgd;
261 262
  typedef T data_type;
  typedef sgd_attr_t attr_type;
263 264
  typedef void (*func_type)(
      const T*, const T*, const T*, const int64_t*, T*, const sgd_attr_t*);
265 266
};

267 268 269 270 271 272 273 274 275 276 277
typedef struct adam_attr_s {
  float beta1, beta2;
  adam_attr_s() = default;
  explicit adam_attr_s(float beta1, float beta2) : beta1(beta1), beta2(beta2) {}
} adam_attr_t;

template <typename T>
struct AdamTuple {
  static constexpr KernelType kernel_type = kAdam;
  typedef T data_type;
  typedef adam_attr_t attr_type;
278 279
  typedef void (*func_type)(
      T, T, T, T, int64_t, const T*, const T*, const T*, const T*, T*, T*, T*);
280 281
};

282 283 284 285 286
template <typename T>
struct AdamWTuple {
  static constexpr KernelType kernel_type = kAdamW;
  typedef T data_type;
  typedef int attr_type;
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
  typedef void (*func_type)(T,
                            T,
                            T,
                            T,
                            T,
                            T,
                            T,
                            int64_t,
                            const T*,
                            const T*,
                            const T*,
                            const T*,
                            T*,
                            T*,
                            T*);
302 303
};

304 305 306 307 308 309 310 311
typedef struct matmul_attr_s {
  int m, n, k;
  void* packed_weight{nullptr};
  matmul_attr_s() = default;
  explicit matmul_attr_s(int m_, int n_, int k_, void* packed_weight_ = nullptr)
      : m(m_), n(n_), k(k_), packed_weight(packed_weight_) {}
} matmul_attr_t;

T
tensor-tang 已提交
312
template <typename T>
313 314
struct MatMulTuple {
  static constexpr KernelType kernel_type = kMatMul;
T
tensor-tang 已提交
315
  typedef T data_type;
316 317
  typedef matmul_attr_t attr_type;
  typedef void (*func_type)(const T*, const T*, T*, const matmul_attr_t*);
T
tensor-tang 已提交
318 319
};

320
template <typename T>
321 322
struct CRFDecodingTuple {
  static constexpr KernelType kernel_type = kCRFDecoding;
323 324 325 326 327 328
  typedef T data_type;
  typedef int attr_type;
  typedef void (*func_type)(const int, const T*, const T*, T*, int*, int);
};

template <typename T>
329 330
struct LayerNormTuple {
  static constexpr KernelType kernel_type = kLayerNorm;
331 332
  typedef T data_type;
  typedef int attr_type;
333 334
  typedef void (*func_type)(
      T*, T*, T*, T*, const T*, const T*, int, const float, int);
335 336
};

T
tensor-tang 已提交
337 338 339 340
// Just for adding to kernel pool without template
class Kernel {
 public:
  Kernel() = default;
T
tensor-tang 已提交
341
  virtual ~Kernel() = default;
342
  virtual const char* ImplType() const = 0;
T
tensor-tang 已提交
343 344 345
  DISABLE_COPY_AND_ASSIGN(Kernel);
};

346
template <typename KernelTuple>
T
tensor-tang 已提交
347
class KernelMore : public Kernel {
348
 public:
349 350 351
  using T = typename KernelTuple::data_type;
  using Func = typename KernelTuple::func_type;
  using Attr = typename KernelTuple::attr_type;
T
tensor-tang 已提交
352
  virtual Func GetFunc() const { return func; }
353 354
  // specify this kernel can be used, means it should not fail if use it.
  virtual bool CanBeUsed(const Attr& attr) const = 0;
T
tensor-tang 已提交
355 356 357 358 359

 protected:
  Func func{nullptr};
};

360 361
template <typename KernelTuple>
class ReferKernel : public KernelMore<KernelTuple> {
T
tensor-tang 已提交
362 363
 public:
  // Refer code can always be used
364
  bool CanBeUsed(const typename KernelTuple::attr_type& attr) const override {
T
tensor-tang 已提交
365 366
    return true;
  }
T
tensor-tang 已提交
367
  const char* ImplType() const override { return "Refer"; }
T
tensor-tang 已提交
368 369
};

T
tensor-tang 已提交
370
}  // namespace jit
371
}  // namespace phi