helper.h 10.6 KB
Newer Older
1
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
T
tensor-tang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. */

#pragma once

17
#include <cstring>
T
tensor-tang 已提交
18
#include <iostream>
19
#include <map>
Y
Yihua Xu 已提交
20
#include <memory>
21 22
#include <unordered_map>
#include <utility>  // for std::move
T
tensor-tang 已提交
23
#include <vector>
W
wanghuancoder 已提交
24

25 26 27 28 29 30
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/kernels/funcs/jit/gen_base.h"
#include "paddle/phi/kernels/funcs/jit/kernel_base.h"
#include "paddle/phi/kernels/funcs/jit/kernel_key.h"
#include "paddle/phi/kernels/funcs/jit/kernel_pool.h"
T
tensor-tang 已提交
31

32
namespace phi {
T
tensor-tang 已提交
33 34
namespace jit {

W
wanghuancoder 已提交
35 36
class GenBase;

37
template <typename KernelTuple, typename PlaceType>
T
tensor-tang 已提交
38
inline typename std::enable_if<
39
    std::is_same<typename KernelTuple::data_type, float>::value &&
40
        std::is_same<PlaceType, phi::CPUPlace>::value,
41
    const Kernel*>::type
42 43
GetJitCode(const typename KernelTuple::attr_type& attr) {
  using Attr = typename KernelTuple::attr_type;
44
  int64_t key = JitCodeKey<Attr>(attr);
45
  auto& codes = JitCodePool<KernelTuple::kernel_type>::Instance();
T
tensor-tang 已提交
46
  if (codes.Has(key)) {
47
    return codes.AllKernels().at(key).get();
T
tensor-tang 已提交
48 49
  }

T
tensor-tang 已提交
50
  // creator is not related with attr, so can use KernelKey as key
51
  KernelKey kkey(KernelTuple::kernel_type, PlaceType());
T
tensor-tang 已提交
52
  // pool: (KernelKey(type, place), vector<GenCreatorPtr>)
53
  auto& creator_map = JitCodeCreatorPool::Instance().AllCreators();
T
tensor-tang 已提交
54 55 56 57 58
  auto iter = creator_map.find(kkey);
  if (iter != creator_map.end()) {
    auto& creators = iter->second;
    for (auto& cur : creators) {
      auto i = dynamic_cast<const JitCodeCreator<Attr>*>(cur.get());
59
      if (i && i->CanBeUsed(attr)) {
T
tensor-tang 已提交
60 61
        auto p = i->CreateJitCode(attr);
        if (p) {
62
          auto res = p.get();
T
tensor-tang 已提交
63
          codes.Insert(key, std::move(p));
64
          return res;
T
tensor-tang 已提交
65 66 67 68
        }
      }
    }
  }
T
tensor-tang 已提交
69 70 71
  return nullptr;
}

72
template <typename KernelTuple, typename PlaceType>
T
tensor-tang 已提交
73
inline typename std::enable_if<
74
    !std::is_same<typename KernelTuple::data_type, float>::value ||
75
        !std::is_same<PlaceType, phi::CPUPlace>::value,
76
    const Kernel*>::type
77
GetJitCode(const typename KernelTuple::attr_type& attr UNUSED) {
T
tensor-tang 已提交
78 79 80
  return nullptr;
}

T
tensor-tang 已提交
81 82
// Refer code do not related with attr, which is just for cast
// Refer is always on CPUPlace
83
template <typename KernelTuple>
84 85
inline const Kernel* GetReferKernel() {
  auto& ref_pool = ReferKernelPool::Instance().AllKernels();
86
  KernelKey kkey(KernelTuple::kernel_type, phi::CPUPlace());
T
tensor-tang 已提交
87
  auto ref_iter = ref_pool.find(kkey);
G
GaoWei8 已提交
88
  PADDLE_ENFORCE_NE(
89 90
      ref_iter,
      ref_pool.end(),
91
      phi::errors::PreconditionNotMet(
G
GaoWei8 已提交
92
          "Every Refer Kernel of jitcode should have reference function."));
T
tensor-tang 已提交
93 94
  auto& ref_impls = ref_iter->second;
  for (auto& impl : ref_impls) {
95
    auto i = dynamic_cast<const ReferKernel<KernelTuple>*>(impl.get());
T
tensor-tang 已提交
96
    if (i) {
97
      return i;
T
tensor-tang 已提交
98 99 100 101 102
    }
  }
  return nullptr;
}

103 104 105 106
template <typename KernelTuple>
inline typename KernelTuple::func_type GetReferFunc() {
  auto ker = GetReferKernel<KernelTuple>();
  auto p = dynamic_cast<const ReferKernel<KernelTuple>*>(ker);
107 108 109 110
  PADDLE_ENFORCE_NOT_NULL(
      p,
      phi::errors::InvalidArgument("Get the reference code of kernel in CPU "
                                   "failed. The Refer kernel should exsit."));
111 112 113 114 115 116
  return p->GetFunc();
}

// Return all Kernels that can be used
template <typename KernelTuple, typename PlaceType>
std::vector<const Kernel*> GetAllCandidateKernels(
117
    const typename KernelTuple::attr_type& attr) {
118 119 120 121 122
  // the search order shoudl be jitcode > more > refer
  std::vector<const Kernel*> res;
  auto jitker = GetJitCode<KernelTuple, PlaceType>(attr);
  if (jitker) {
    res.emplace_back(jitker);
T
tensor-tang 已提交
123
  }
T
tensor-tang 已提交
124

125
  // more kernelpool: (KernelKey(type, place), vector<KernelPtr>)
126
  KernelKey kkey(KernelTuple::kernel_type, PlaceType());
127
  auto& pool = KernelPool::Instance().AllKernels();
T
tensor-tang 已提交
128 129 130 131
  auto iter = pool.find(kkey);
  if (iter != pool.end()) {
    auto& impls = iter->second;
    for (auto& impl : impls) {
132
      auto i = dynamic_cast<const KernelMore<KernelTuple>*>(impl.get());
133 134
      if (i && i->CanBeUsed(attr)) {
        res.emplace_back(i);
T
tensor-tang 已提交
135 136 137 138 139
      }
    }
  }

  // The last implementation should be reference function on CPUPlace.
140
  auto ref = GetReferKernel<KernelTuple>();
141 142 143 144
  PADDLE_ENFORCE_NOT_NULL(
      ref,
      phi::errors::InvalidArgument("Get all candicate kernel in CPU failed. "
                                   "The Refer Kernel can not be empty."));
145 146 147 148
  res.emplace_back(ref);
  return res;
}

149
template <typename KernelTuple, typename PlaceType = phi::CPUPlace>
150 151 152 153 154 155 156 157 158
std::vector<std::pair<std::string, typename KernelTuple::func_type>>
GetAllCandidateFuncsWithTypes(const typename KernelTuple::attr_type& attr) {
  using Func = typename KernelTuple::func_type;
  auto kers = GetAllCandidateKernels<KernelTuple, PlaceType>(attr);
  std::vector<std::pair<std::string, Func>> res;
  for (auto k : kers) {
    std::string name = k->ImplType();
    if (name == "JitCode") {
      auto i = dynamic_cast<const GenBase*>(k);
G
GaoWei8 已提交
159
      PADDLE_ENFORCE_NOT_NULL(i,
160
                              phi::errors::InvalidArgument(
G
GaoWei8 已提交
161
                                  "Generate jitcode kernel (GenBase) failed."));
162 163 164
      res.emplace_back(std::make_pair(name, i->template getCode<Func>()));
    } else {
      auto i = dynamic_cast<const KernelMore<KernelTuple>*>(k);
165 166
      PADDLE_ENFORCE_NOT_NULL(
          i, phi::errors::InvalidArgument("Kernel cast (KernelMore) failed."));
167 168 169 170 171 172
      res.emplace_back(std::make_pair(name, i->GetFunc()));
    }
  }
  return res;
}

173
template <typename KernelTuple, typename PlaceType = phi::CPUPlace>
174 175 176 177 178 179 180 181 182 183
std::vector<typename KernelTuple::func_type> GetAllCandidateFuncs(
    const typename KernelTuple::attr_type& attr) {
  auto funcs = GetAllCandidateFuncsWithTypes<KernelTuple, PlaceType>(attr);
  std::vector<typename KernelTuple::func_type> res;
  for (auto& i : funcs) {
    res.emplace_back(i.second);
  }
  return res;
}

184
template <typename KernelTuple, typename PlaceType = phi::CPUPlace>
185 186 187
typename KernelTuple::func_type GetDefaultBestFunc(
    const typename KernelTuple::attr_type& attr) {
  auto funcs = GetAllCandidateFuncs<KernelTuple, PlaceType>(attr);
188 189
  PADDLE_ENFORCE_GE(funcs.size(),
                    1UL,
190
                    phi::errors::InvalidArgument(
G
GaoWei8 已提交
191
                        "The candicate jit kernel is at least one in CPU."));
192 193 194 195
  // Here could do some runtime benchmark of this attr and return the best one.
  // But yet just get the first one as the default best one,
  // which is searched in order and tuned by offline.
  return funcs[0];
T
tensor-tang 已提交
196 197
}

198
extern std::map<size_t, std::shared_ptr<void>>& GetFuncCacheMap();
Y
Yihua Xu 已提交
199

200
template <typename KernelTuple, typename PlaceType>
T
tensor-tang 已提交
201
class KernelFuncs {
T
tensor-tang 已提交
202
 public:
T
tensor-tang 已提交
203 204
  KernelFuncs() = default;
  static KernelFuncs& Cache() {
Y
Yihua Xu 已提交
205
    auto& func_cache_map = GetFuncCacheMap();
206
    auto key = typeid(KernelFuncs<KernelTuple, PlaceType>).hash_code();
Y
Yihua Xu 已提交
207 208 209 210 211 212 213 214 215
    auto iter = func_cache_map.find(key);
    if (iter != func_cache_map.end()) {
      return *(KernelFuncs<KernelTuple, PlaceType>*)(iter->second.get());
    } else {
      std::shared_ptr<void> cache =
          std::make_shared<KernelFuncs<KernelTuple, PlaceType>>();
      func_cache_map.emplace(key, cache);
      return *(KernelFuncs<KernelTuple, PlaceType>*)(cache.get());
    }
T
tensor-tang 已提交
216 217
  }

218
  // the exposed interface to use
219 220
  typename KernelTuple::func_type At(
      const typename KernelTuple::attr_type& attr) {
221 222
    // Maybe here is not good enough, not all kernels should have jitcode
    int64_t key = JitCodeKey<typename KernelTuple::attr_type>(attr);
T
tensor-tang 已提交
223 224 225
    if (Has(key)) {
      return funcs_.at(key);
    }
226 227
    // If do not have this attr in cache then get the default best
    auto func = GetDefaultBestFunc<KernelTuple, PlaceType>(attr);
T
tensor-tang 已提交
228 229 230 231
    Insert(key, func);
    return func;
  }

232 233
  typename KernelTuple::func_type operator[](
      const typename KernelTuple::attr_type& attr) {
234 235 236 237 238
    return At(attr);
  }

 protected:
  bool Has(int64_t key) const { return funcs_.find(key) != funcs_.end(); }
239
  void Insert(int64_t key, typename KernelTuple::func_type func) {
240 241 242
    funcs_.emplace(key, func);
  }

T
tensor-tang 已提交
243
 private:
244
  std::unordered_map<int64_t, typename KernelTuple::func_type> funcs_;
T
tensor-tang 已提交
245
  DISABLE_COPY_AND_ASSIGN(KernelFuncs);
T
tensor-tang 已提交
246 247
};

248
const char* to_string(KernelType kt);
249
const char* to_string(SeqPoolType kt);
250

T
tensor-tang 已提交
251 252
KernelType to_kerneltype(const std::string& act);

T
tensor-tang 已提交
253 254 255 256 257 258 259
inline std::ostream& operator<<(std::ostream& os, const lstm_attr_t& attr) {
  os << "dim_size[" << attr.d << "],act_gate[" << to_string(attr.act_gate)
     << "],act_cand[" << to_string(attr.act_cand) << "],act_cell["
     << to_string(attr.act_cell) << "],use_peephole["
     << (attr.use_peephole ? "True" : "False") << "]";
  return os;
}
260

T
tensor-tang 已提交
261 262 263 264 265
inline std::ostream& operator<<(std::ostream& os, const gru_attr_t& attr) {
  os << "dim_size[" << attr.d << "],act_gate[" << to_string(attr.act_gate)
     << "],act_cand[" << to_string(attr.act_cand) << "]";
  return os;
}
266

267 268 269 270 271
inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) {
  os << "height_size[" << attr.h << "],width_size[" << attr.w << "],pool_type["
     << to_string(attr.type) << "]";
  return os;
}
T
tensor-tang 已提交
272

273 274 275 276 277 278 279 280 281
inline std::ostream& operator<<(std::ostream& os,
                                const emb_seq_pool_attr_t& attr) {
  os << "table_height[" << attr.table_height << "],table_width["
     << attr.table_width << "],index_height[" << attr.index_height
     << "],index_width[" << attr.index_width << "],output_width["
     << attr.out_width << "],pool_type[" << to_string(attr.pool_type) << "]";
  return os;
}

282 283 284 285 286
inline std::ostream& operator<<(std::ostream& os, const adam_attr_t& attr) {
  os << "beta1[" << attr.beta1 << "],beta2[" << attr.beta2 << "]";
  return os;
}

287 288 289 290 291 292 293 294
inline std::ostream& operator<<(std::ostream& os, const sgd_attr_t& attr) {
  os << "param_height[" << attr.param_height << "],param_width["
     << attr.param_width << "],grad_height[" << attr.grad_height
     << "],grad_width[" << attr.grad_width << "],selected_rows_size["
     << attr.selected_rows_size << "]";
  return os;
}

295 296 297 298 299 300 301 302 303
inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) {
  os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]";
  return os;
}

// expose the method to pack matmul weight
template <typename T>
void pack_weights(const T* src, T* dst, int n, int k);

T
tensor-tang 已提交
304
}  // namespace jit
305
}  // namespace phi