op_registry.cc 8.3 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/core/op_registry.h"
#include <list>
#include <set>

namespace paddle {
namespace lite {

22 23 24 25
const std::map<std::string, std::string> &GetOp2PathDict() {
  return OpKernelInfoCollector::Global().GetOp2PathDict();
}

Y
Yan Chunwei 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
    const std::string &op_type,
    TargetType target,
    PrecisionType precision,
    DataLayoutType layout) {
  Place place{target, precision, layout};
  VLOG(5) << "creating " << op_type << " kernel for " << place.DebugString();
#define CREATE_KERNEL1(target__, precision__)                                \
  switch (layout) {                                                          \
    case DATALAYOUT(kNCHW):                                                  \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kNCHW)>(op_type);                             \
    case DATALAYOUT(kAny):                                                   \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kAny)>(op_type);                              \
    case DATALAYOUT(kNHWC):                                                  \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kNHWC)>(op_type);                             \
47 48 49 50 51 52 53 54 55 56 57 58
    case DATALAYOUT(kImageDefault):                                          \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageDefault)>(op_type);                     \
    case DATALAYOUT(kImageFolder):                                           \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageFolder)>(op_type);                      \
    case DATALAYOUT(kImageNW):                                               \
      return Create<TARGET(target__),                                        \
                    PRECISION(precision__),                                  \
                    DATALAYOUT(kImageNW)>(op_type);                          \
Y
Yan Chunwei 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72
    default:                                                                 \
      LOG(FATAL) << "unsupported kernel layout " << DataLayoutToStr(layout); \
  }

#define CREATE_KERNEL(target__)                         \
  switch (precision) {                                  \
    case PRECISION(kFloat):                             \
      CREATE_KERNEL1(target__, kFloat);                 \
    case PRECISION(kInt8):                              \
      CREATE_KERNEL1(target__, kInt8);                  \
    case PRECISION(kFP16):                              \
      CREATE_KERNEL1(target__, kFP16);                  \
    case PRECISION(kAny):                               \
      CREATE_KERNEL1(target__, kAny);                   \
J
juncaipeng 已提交
73 74
    case PRECISION(kInt32):                             \
      CREATE_KERNEL1(target__, kInt32);                 \
75 76
    case PRECISION(kInt64):                             \
      CREATE_KERNEL1(target__, kInt64);                 \
Y
Yan Chunwei 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
    default:                                            \
      CHECK(false) << "not supported kernel precision " \
                   << PrecisionToStr(precision);        \
  }

  switch (target) {
    case TARGET(kHost): {
      CREATE_KERNEL(kHost);
    } break;
    case TARGET(kX86): {
      CREATE_KERNEL(kX86);
    } break;
    case TARGET(kCUDA): {
      CREATE_KERNEL(kCUDA);
    } break;
    case TARGET(kARM): {
      CREATE_KERNEL(kARM);
    } break;
    case TARGET(kOpenCL): {
      CREATE_KERNEL(kOpenCL);
    } break;
    case TARGET(kNPU): {
      CREATE_KERNEL(kNPU);
    } break;
101 102 103
    case TARGET(kXPU): {
      CREATE_KERNEL(kXPU);
    } break;
Y
Yan Chunwei 已提交
104 105 106
    case TARGET(kFPGA): {
      CREATE_KERNEL(kFPGA);
    } break;
107 108 109
    case TARGET(kBM): {
      CREATE_KERNEL(kBM);
    } break;
Y
Yan Chunwei 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    default:
      CHECK(false) << "not supported kernel target " << TargetToStr(target);
  }

#undef CREATE_KERNEL
  return std::list<std::unique_ptr<KernelBase>>();
}

KernelRegistry::KernelRegistry()
    : registries_(static_cast<int>(TARGET(NUM)) *
                  static_cast<int>(PRECISION(NUM)) *
                  static_cast<int>(DATALAYOUT(NUM))) {
#define INIT_FOR(target__, precision__, layout__)                      \
  registries_[KernelRegistry::GetKernelOffset<TARGET(target__),        \
                                              PRECISION(precision__),  \
                                              DATALAYOUT(layout__)>()] \
      .set<KernelRegistryForTarget<TARGET(target__),                   \
                                   PRECISION(precision__),             \
                                   DATALAYOUT(layout__)> *>(           \
          &KernelRegistryForTarget<TARGET(target__),                   \
                                   PRECISION(precision__),             \
                                   DATALAYOUT(layout__)>::Global());
  // Currently, just register 2 kernel targets.
  INIT_FOR(kCUDA, kFloat, kNCHW);
134
  INIT_FOR(kCUDA, kFloat, kNHWC);
Z
Zhen Wang 已提交
135
  INIT_FOR(kCUDA, kInt8, kNCHW);
Y
Yan Chunwei 已提交
136 137
  INIT_FOR(kCUDA, kAny, kNCHW);
  INIT_FOR(kCUDA, kAny, kAny);
138
  INIT_FOR(kCUDA, kInt8, kNHWC);
139 140
  INIT_FOR(kCUDA, kInt64, kNCHW);
  INIT_FOR(kCUDA, kInt64, kNHWC);
Y
Yan Chunwei 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153

  INIT_FOR(kHost, kFloat, kNCHW);
  INIT_FOR(kHost, kAny, kNCHW);
  INIT_FOR(kHost, kFloat, kNHWC);
  INIT_FOR(kHost, kFloat, kAny);
  INIT_FOR(kHost, kAny, kNHWC);
  INIT_FOR(kHost, kAny, kAny);
  INIT_FOR(kHost, kAny, kNHWC);
  INIT_FOR(kHost, kAny, kAny);

  INIT_FOR(kX86, kFloat, kNCHW);
  INIT_FOR(kX86, kAny, kNCHW);
  INIT_FOR(kX86, kAny, kAny);
154
  INIT_FOR(kX86, kInt64, kNCHW);
Y
Yan Chunwei 已提交
155 156

  INIT_FOR(kARM, kFloat, kNCHW);
157
  INIT_FOR(kARM, kFloat, kNHWC);
Y
Yan Chunwei 已提交
158
  INIT_FOR(kARM, kInt8, kNCHW);
159
  INIT_FOR(kARM, kInt8, kNHWC);
Y
Yan Chunwei 已提交
160 161
  INIT_FOR(kARM, kAny, kNCHW);
  INIT_FOR(kARM, kAny, kAny);
J
juncaipeng 已提交
162
  INIT_FOR(kARM, kInt32, kNCHW);
X
xiaogang 已提交
163
  INIT_FOR(kARM, kInt64, kNCHW);
Y
Yan Chunwei 已提交
164 165

  INIT_FOR(kOpenCL, kFloat, kNCHW);
166
  INIT_FOR(kOpenCL, kFloat, kNHWC);
Y
Yan Chunwei 已提交
167
  INIT_FOR(kOpenCL, kAny, kNCHW);
168 169 170
  INIT_FOR(kOpenCL, kAny, kNHWC);
  INIT_FOR(kOpenCL, kFloat, kAny);
  INIT_FOR(kOpenCL, kInt8, kNCHW);
Y
Yan Chunwei 已提交
171
  INIT_FOR(kOpenCL, kAny, kAny);
172 173 174 175 176 177 178 179 180 181 182
  INIT_FOR(kOpenCL, kFP16, kNCHW);
  INIT_FOR(kOpenCL, kFP16, kNHWC);
  INIT_FOR(kOpenCL, kFP16, kImageDefault);
  INIT_FOR(kOpenCL, kFP16, kImageFolder);
  INIT_FOR(kOpenCL, kFP16, kImageNW);
  INIT_FOR(kOpenCL, kFloat, kImageDefault);
  INIT_FOR(kOpenCL, kFloat, kImageFolder);
  INIT_FOR(kOpenCL, kFloat, kImageNW);
  INIT_FOR(kOpenCL, kAny, kImageDefault);
  INIT_FOR(kOpenCL, kAny, kImageFolder);
  INIT_FOR(kOpenCL, kAny, kImageNW);
Y
Yan Chunwei 已提交
183 184

  INIT_FOR(kNPU, kFloat, kNCHW);
185
  INIT_FOR(kNPU, kFloat, kNHWC);
Y
Yan Chunwei 已提交
186
  INIT_FOR(kNPU, kInt8, kNCHW);
187
  INIT_FOR(kNPU, kInt8, kNHWC);
Y
Yan Chunwei 已提交
188
  INIT_FOR(kNPU, kAny, kNCHW);
189
  INIT_FOR(kNPU, kAny, kNHWC);
Y
Yan Chunwei 已提交
190 191
  INIT_FOR(kNPU, kAny, kAny);

192 193 194 195 196
  INIT_FOR(kXPU, kFloat, kNCHW);
  INIT_FOR(kXPU, kInt8, kNCHW);
  INIT_FOR(kXPU, kAny, kNCHW);
  INIT_FOR(kXPU, kAny, kAny);

Y
Yan Chunwei 已提交
197 198 199 200 201
  INIT_FOR(kFPGA, kFP16, kNHWC);
  INIT_FOR(kFPGA, kFP16, kAny);
  INIT_FOR(kFPGA, kFloat, kNHWC);
  INIT_FOR(kFPGA, kAny, kNHWC);
  INIT_FOR(kFPGA, kAny, kAny);
202 203 204 205 206

  INIT_FOR(kBM, kFloat, kNCHW);
  INIT_FOR(kBM, kInt8, kNCHW);
  INIT_FOR(kBM, kAny, kNCHW);
  INIT_FOR(kBM, kAny, kAny);
Y
Yan Chunwei 已提交
207 208 209 210 211 212 213 214 215 216
#undef INIT_FOR
}

KernelRegistry &KernelRegistry::Global() {
  static auto *x = new KernelRegistry;
  return *x;
}

}  // namespace lite
}  // namespace paddle