op_registry.cc 9.5 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/op_registry.h"
#include <list>
17
#include <memory>
Y
Yan Chunwei 已提交
18 19 20 21 22
#include <set>

namespace paddle {
namespace lite {

23 24 25 26
const std::map<std::string, std::string> &GetOp2PathDict() {
  return OpKernelInfoCollector::Global().GetOp2PathDict();
}

Y
Yan Chunwei 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
    const std::string &op_type,
    TargetType target,
    PrecisionType precision,
    DataLayoutType layout) {
  Place place{target, precision, layout};
  VLOG(5) << "creating " << op_type << " kernel for " << place.DebugString();
#define CREATE_KERNEL1(target__, precision__)                                \
  switch (layout) {                                                          \
    case DATALAYOUT(kNCHW):                                                  \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kNCHW)>(op_type);                             \
    case DATALAYOUT(kAny):                                                   \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kAny)>(op_type);                              \
    case DATALAYOUT(kNHWC):                                                  \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kNHWC)>(op_type);                             \
48 49 50 51 52 53 54 55 56 57 58 59
    case DATALAYOUT(kImageDefault):                                          \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageDefault)>(op_type);                     \
    case DATALAYOUT(kImageFolder):                                           \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageFolder)>(op_type);                      \
    case DATALAYOUT(kImageNW):                                               \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageNW)>(op_type);                          \
Y
Yan Chunwei 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73
    default:                                                                 \
      LOG(FATAL) << "unsupported kernel layout " << DataLayoutToStr(layout); \
  }

#define CREATE_KERNEL(target__)                         \
  switch (precision) {                                  \
    case PRECISION(kFloat):                             \
      CREATE_KERNEL1(target__, kFloat);                 \
    case PRECISION(kInt8):                              \
      CREATE_KERNEL1(target__, kInt8);                  \
    case PRECISION(kFP16):                              \
      CREATE_KERNEL1(target__, kFP16);                  \
    case PRECISION(kAny):                               \
      CREATE_KERNEL1(target__, kAny);                   \
J
juncaipeng 已提交
74 75
    case PRECISION(kInt32):                             \
      CREATE_KERNEL1(target__, kInt32);                 \
76 77
    case PRECISION(kInt64):                             \
      CREATE_KERNEL1(target__, kInt64);                 \
Y
Yan Chunwei 已提交
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
    default:                                            \
      CHECK(false) << "not supported kernel precision " \
                   << PrecisionToStr(precision);        \
  }

  switch (target) {
    case TARGET(kHost): {
      CREATE_KERNEL(kHost);
    } break;
    case TARGET(kX86): {
      CREATE_KERNEL(kX86);
    } break;
    case TARGET(kCUDA): {
      CREATE_KERNEL(kCUDA);
    } break;
    case TARGET(kARM): {
      CREATE_KERNEL(kARM);
    } break;
    case TARGET(kOpenCL): {
      CREATE_KERNEL(kOpenCL);
    } break;
    case TARGET(kNPU): {
      CREATE_KERNEL(kNPU);
    } break;
H
hong19860320 已提交
102 103 104
    case TARGET(kAPU): {
      CREATE_KERNEL(kAPU);
    } break;
105 106 107
    case TARGET(kXPU): {
      CREATE_KERNEL(kXPU);
    } break;
Y
Yan Chunwei 已提交
108 109 110
    case TARGET(kFPGA): {
      CREATE_KERNEL(kFPGA);
    } break;
111 112 113
    case TARGET(kBM): {
      CREATE_KERNEL(kBM);
    } break;
114 115 116
    case TARGET(kMLU): {
      CREATE_KERNEL(kMLU);
    } break;
117 118 119
    case TARGET(kRKNPU): {
      CREATE_KERNEL(kRKNPU);
    } break;
Y
Yan Chunwei 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
    default:
      CHECK(false) << "not supported kernel target " << TargetToStr(target);
  }

#undef CREATE_KERNEL
  return std::list<std::unique_ptr<KernelBase>>();
}

KernelRegistry::KernelRegistry()
    : registries_(static_cast<int>(TARGET(NUM)) *
                  static_cast<int>(PRECISION(NUM)) *
                  static_cast<int>(DATALAYOUT(NUM))) {
#define INIT_FOR(target__, precision__, layout__)                      \
  registries_[KernelRegistry::GetKernelOffset<TARGET(target__),        \
                                              PRECISION(precision__),  \
                                              DATALAYOUT(layout__)>()] \
      .set<KernelRegistryForTarget<TARGET(target__),                   \
                                   PRECISION(precision__),             \
                                   DATALAYOUT(layout__)> *>(           \
          &KernelRegistryForTarget<TARGET(target__),                   \
                                   PRECISION(precision__),             \
                                   DATALAYOUT(layout__)>::Global());
  // Currently, just register 2 kernel targets.
  INIT_FOR(kCUDA, kFloat, kNCHW);
144
  INIT_FOR(kCUDA, kFloat, kNHWC);
Z
Zhen Wang 已提交
145
  INIT_FOR(kCUDA, kInt8, kNCHW);
Y
Yan Chunwei 已提交
146 147
  INIT_FOR(kCUDA, kAny, kNCHW);
  INIT_FOR(kCUDA, kAny, kAny);
148
  INIT_FOR(kCUDA, kInt8, kNHWC);
149 150
  INIT_FOR(kCUDA, kInt64, kNCHW);
  INIT_FOR(kCUDA, kInt64, kNHWC);
Y
Yan Chunwei 已提交
151

152 153 154 155 156 157 158 159 160
  INIT_FOR(kMLU, kFloat, kNHWC);
  INIT_FOR(kMLU, kFloat, kNCHW);
  INIT_FOR(kMLU, kFP16, kNHWC);
  INIT_FOR(kMLU, kFP16, kNCHW);
  INIT_FOR(kMLU, kInt8, kNHWC);
  INIT_FOR(kMLU, kInt8, kNCHW);
  INIT_FOR(kMLU, kInt16, kNHWC);
  INIT_FOR(kMLU, kInt16, kNCHW);

Y
Yan Chunwei 已提交
161 162 163
  INIT_FOR(kHost, kAny, kNCHW);
  INIT_FOR(kHost, kAny, kNHWC);
  INIT_FOR(kHost, kAny, kAny);
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
  INIT_FOR(kHost, kBool, kNCHW);
  INIT_FOR(kHost, kBool, kNHWC);
  INIT_FOR(kHost, kBool, kAny);
  INIT_FOR(kHost, kFloat, kNCHW);
  INIT_FOR(kHost, kFloat, kNHWC);
  INIT_FOR(kHost, kFloat, kAny);
  INIT_FOR(kHost, kFP16, kNCHW);
  INIT_FOR(kHost, kFP16, kNHWC);
  INIT_FOR(kHost, kFP16, kAny);
  INIT_FOR(kHost, kInt8, kNCHW);
  INIT_FOR(kHost, kInt8, kNHWC);
  INIT_FOR(kHost, kInt8, kAny);
  INIT_FOR(kHost, kInt16, kNCHW);
  INIT_FOR(kHost, kInt16, kNHWC);
  INIT_FOR(kHost, kInt16, kAny);
  INIT_FOR(kHost, kInt32, kNCHW);
  INIT_FOR(kHost, kInt32, kNHWC);
  INIT_FOR(kHost, kInt32, kAny);
  INIT_FOR(kHost, kInt64, kNCHW);
  INIT_FOR(kHost, kInt64, kNHWC);
  INIT_FOR(kHost, kInt64, kAny);
Y
Yan Chunwei 已提交
185 186 187 188

  INIT_FOR(kX86, kFloat, kNCHW);
  INIT_FOR(kX86, kAny, kNCHW);
  INIT_FOR(kX86, kAny, kAny);
189
  INIT_FOR(kX86, kInt64, kNCHW);
Y
Yan Chunwei 已提交
190 191

  INIT_FOR(kARM, kFloat, kNCHW);
192
  INIT_FOR(kARM, kFloat, kNHWC);
Y
Yan Chunwei 已提交
193
  INIT_FOR(kARM, kInt8, kNCHW);
194
  INIT_FOR(kARM, kInt8, kNHWC);
Y
Yan Chunwei 已提交
195 196
  INIT_FOR(kARM, kAny, kNCHW);
  INIT_FOR(kARM, kAny, kAny);
J
juncaipeng 已提交
197
  INIT_FOR(kARM, kInt32, kNCHW);
X
xiaogang 已提交
198
  INIT_FOR(kARM, kInt64, kNCHW);
Y
Yan Chunwei 已提交
199 200

  INIT_FOR(kOpenCL, kFloat, kNCHW);
201
  INIT_FOR(kOpenCL, kFloat, kNHWC);
Y
Yan Chunwei 已提交
202
  INIT_FOR(kOpenCL, kAny, kNCHW);
203 204 205
  INIT_FOR(kOpenCL, kAny, kNHWC);
  INIT_FOR(kOpenCL, kFloat, kAny);
  INIT_FOR(kOpenCL, kInt8, kNCHW);
Y
Yan Chunwei 已提交
206
  INIT_FOR(kOpenCL, kAny, kAny);
207 208 209 210 211 212 213 214 215 216 217
  INIT_FOR(kOpenCL, kFP16, kNCHW);
  INIT_FOR(kOpenCL, kFP16, kNHWC);
  INIT_FOR(kOpenCL, kFP16, kImageDefault);
  INIT_FOR(kOpenCL, kFP16, kImageFolder);
  INIT_FOR(kOpenCL, kFP16, kImageNW);
  INIT_FOR(kOpenCL, kFloat, kImageDefault);
  INIT_FOR(kOpenCL, kFloat, kImageFolder);
  INIT_FOR(kOpenCL, kFloat, kImageNW);
  INIT_FOR(kOpenCL, kAny, kImageDefault);
  INIT_FOR(kOpenCL, kAny, kImageFolder);
  INIT_FOR(kOpenCL, kAny, kImageNW);
Y
Yan Chunwei 已提交
218 219

  INIT_FOR(kNPU, kFloat, kNCHW);
220
  INIT_FOR(kNPU, kFloat, kNHWC);
Y
Yan Chunwei 已提交
221
  INIT_FOR(kNPU, kInt8, kNCHW);
222
  INIT_FOR(kNPU, kInt8, kNHWC);
Y
Yan Chunwei 已提交
223
  INIT_FOR(kNPU, kAny, kNCHW);
224
  INIT_FOR(kNPU, kAny, kNHWC);
Y
Yan Chunwei 已提交
225 226
  INIT_FOR(kNPU, kAny, kAny);

H
hong19860320 已提交
227
  INIT_FOR(kAPU, kInt8, kNCHW);
228 229 230 231 232
  INIT_FOR(kXPU, kFloat, kNCHW);
  INIT_FOR(kXPU, kInt8, kNCHW);
  INIT_FOR(kXPU, kAny, kNCHW);
  INIT_FOR(kXPU, kAny, kAny);

Y
Yan Chunwei 已提交
233 234 235 236 237
  INIT_FOR(kFPGA, kFP16, kNHWC);
  INIT_FOR(kFPGA, kFP16, kAny);
  INIT_FOR(kFPGA, kFloat, kNHWC);
  INIT_FOR(kFPGA, kAny, kNHWC);
  INIT_FOR(kFPGA, kAny, kAny);
238 239 240 241 242

  INIT_FOR(kBM, kFloat, kNCHW);
  INIT_FOR(kBM, kInt8, kNCHW);
  INIT_FOR(kBM, kAny, kNCHW);
  INIT_FOR(kBM, kAny, kAny);
243 244 245 246 247

  INIT_FOR(kRKNPU, kFloat, kNCHW);
  INIT_FOR(kRKNPU, kInt8, kNCHW);
  INIT_FOR(kRKNPU, kAny, kNCHW);
  INIT_FOR(kRKNPU, kAny, kAny);
Y
Yan Chunwei 已提交
248 249 250 251
#undef INIT_FOR
}

KernelRegistry &KernelRegistry::Global() {
252
  static auto x = std::unique_ptr<KernelRegistry>(new KernelRegistry);
Y
Yan Chunwei 已提交
253 254 255 256 257
  return *x;
}

}  // namespace lite
}  // namespace paddle