mkldnn_helper.h 16.2 KB
Newer Older
1
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
T
tensor-tang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once

16
#include <algorithm>
J
Jacek Czaja 已提交
17
#include <iostream>
P
Physher 已提交
18
#include <memory>
J
Jacek Czaja 已提交
19
#include <sstream>
G
gongweibao 已提交
20
#include <string>
21
#include <utility>
22
#include <vector>
23

24
#include "dnnl.hpp"  // NOLINT
25
#include "paddle/fluid/framework/operator.h"
M
mozga-intel 已提交
26
#include "paddle/fluid/platform/place.h"
C
chenjian 已提交
27
#include "paddle/fluid/platform/profiler/event_tracing.h"
T
tensor-tang 已提交
28
namespace paddle {
29
#ifdef PADDLE_WITH_MKLDNN
30
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
31
#endif
T
tensor-tang 已提交
32 33
namespace platform {

34 35 36 37 38 39
using MKLDNNStream = dnnl::stream;
using MKLDNNEngine = dnnl::engine;
using MKLDNNMemory = dnnl::memory;
using MKLDNNMemoryDescriptor = dnnl::memory::desc;
using MKLDNNPrimitive = dnnl::primitive;
using MKLDNNPrimitiveDesc = dnnl::handle<dnnl_primitive_desc_t>;
T
tensor-tang 已提交
40

41 42 43 44 45
typedef std::unique_ptr<MKLDNNStream> MKLDNNStreamPtr;
typedef std::unique_ptr<MKLDNNEngine> MKLDNNEnginePtr;
typedef std::unique_ptr<MKLDNNMemory> MKLDNNMemoryPtr;
typedef std::unique_ptr<MKLDNNPrimitive> MKLDNNPrimitivePtr;
typedef std::unique_ptr<MKLDNNPrimitiveDesc> MKLDNNPrimitiveDescPtr;
T
tensor-tang 已提交
46

47 48 49 50 51
template <typename Type>
void* to_void_cast(const Type* t) {
  return static_cast<void*>(const_cast<Type*>(t));
}

K
Krzysztof Binias 已提交
52 53 54 55 56
template <typename Type>
void* to_void_reinterpret_cast(const Type* t) {
  return reinterpret_cast<void*>(const_cast<Type*>(t));
}

57 58 59 60 61 62 63 64 65
template <class Type>
using tf_desc = typename Type::desc;

template <class Type>
using tf_pd = typename Type::primitive_desc;

template <typename Type, typename Engine, typename... Args>
std::shared_ptr<tf_pd<Type>> MKLDNNFwdPrimitiveDesc(const Engine& e,
                                                    Args&&... args) {
66
  auto desc = tf_desc<Type>(dnnl::prop_kind::forward, (args)...);
67 68 69 70 71
  auto pd = new tf_pd<Type>(desc, e);
  return std::shared_ptr<tf_pd<Type>>(pd);
}

template <typename Type, typename Engine, typename Primitive, typename... Args>
72 73
tf_pd<Type> MKLDNNBwdPrimitiveDesc(const Engine& e,
                                   const Primitive& p,
74 75 76 77 78
                                   Args&&... args) {
  auto desc = tf_desc<Type>(args...);
  return tf_pd<Type>(desc, e, p);
}

79
inline void MatchShapeToLayout(phi::DenseTensor* tensor_in,
80 81
                               phi::DataLayout from,
                               phi::DataLayout to) {
J
Jacek Czaja 已提交
82 83 84 85 86 87
  auto print_dims = [](const std::vector<int>& dims) {
    std::ostringstream oss;

    if (!dims.empty()) {
      oss << "[";
      // Convert all but the last element to avoid a trailing ","
88 89
      std::copy(
          dims.begin(), dims.end() - 1, std::ostream_iterator<int>(oss, ","));
J
Jacek Czaja 已提交
90 91 92 93 94 95 96 97

      // Now add the last element with no delimiter
      oss << dims.back() << "]";
    }

    return oss.str();
  };

98 99 100 101 102 103 104 105 106
  // In these data layouts, channel dimension is either on 2nd position: nChw or
  // at last nhwC, so for dim==2 these layouts are the same and nothing should
  // be done. Similarly for dim==1 when you have just one possible combination.
  if (tensor_in->dims().size() < 3) {
    VLOG(3) << "Keeping kMKLDNN/kNHWC/kNDHWC output_shape"
            << print_dims(phi::vectorize<int>(tensor_in->dims()));
    return;
  }

107
  switch (from) {
108 109
    case phi::DataLayout::kMKLDNN:
      if ((to == phi::DataLayout::kNHWC) || (to == phi::DataLayout::kNDHWC)) {
110
        auto dims = phi::vectorize<int>(tensor_in->dims());
111
        std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end());
112
        tensor_in->Resize(phi::make_ddim(dims));
113
        VLOG(3) << "Rotating Shape from: kMKLDNN to: kNHWC/kNDHWC output_shape"
J
Jacek Czaja 已提交
114
                << print_dims(dims);
115 116
      }
      break;
117 118 119
    case phi::DataLayout::kNHWC:
    case phi::DataLayout::kNDHWC:
      if (to == phi::DataLayout::kMKLDNN) {
120
        auto dims = phi::vectorize<int>(tensor_in->dims());
121
        std::rotate(dims.begin() + 1, dims.end() - 1, dims.end());
122
        tensor_in->Resize(phi::make_ddim(dims));
123
        VLOG(3) << "Rotating Shape from: kNHWC/kNDHWC to: kMKLDNN output_shape"
J
Jacek Czaja 已提交
124
                << print_dims(dims);
125 126 127 128 129 130 131
      }
      break;
    default:
      break;
  }
}

132 133 134 135 136
struct mkldnn_dummy_primitive {
  struct primitive_desc {};
  struct desc {};
};

137 138 139 140
inline dnnl::memory::desc MKLDNNMemDesc(const std::vector<int64_t>& dims,
                                        dnnl::memory::data_type data_type,
                                        MKLDNNMemoryFormat format) {
  return dnnl::memory::desc({dims}, data_type, format);
141 142
}

143 144
inline void ClearMKLDNNCache(const platform::Place& place,
                             void* ptr = nullptr) {
145 146 147 148 149
  // Clear mkl-dnn cache,
  if (platform::is_cpu_place(place)) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::MKLDNNDeviceContext* dev_ctx =
        (platform::MKLDNNDeviceContext*)pool.Get(place);
150
    dev_ctx->ResetBlobMap(ptr);
151 152 153
  }
}

154 155 156 157 158 159 160 161 162 163
inline void DontClearMKLDNNCache(const platform::Place& place) {
  // Clear mkl-dnn cache,
  if (platform::is_cpu_place(place)) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::MKLDNNDeviceContext* dev_ctx =
        (platform::MKLDNNDeviceContext*)pool.Get(place);
    dev_ctx->BlockNextCacheClearing();
  }
}

164
template <typename Type>
165 166
dnnl::memory::data_type MKLDNNGetDataType() {
  return dnnl::memory::data_type::undef;
167 168 169
}

template <>
170 171
inline dnnl::memory::data_type MKLDNNGetDataType<float>() {
  return dnnl::memory::data_type::f32;
172 173
}
template <>
174 175
inline dnnl::memory::data_type MKLDNNGetDataType<int32_t>() {
  return dnnl::memory::data_type::s32;
176
}
P
Physher 已提交
177
template <>
178 179
inline dnnl::memory::data_type MKLDNNGetDataType<int8_t>() {
  return dnnl::memory::data_type::s8;
P
Physher 已提交
180 181
}
template <>
182 183
inline dnnl::memory::data_type MKLDNNGetDataType<uint8_t>() {
  return dnnl::memory::data_type::u8;
P
Physher 已提交
184 185
}

186
template <>
187 188
inline dnnl::memory::data_type MKLDNNGetDataType<paddle::platform::bfloat16>() {
  return dnnl::memory::data_type::bf16;
189 190
}

191 192
inline void Reorder(dnnl::memory src,
                    dnnl::memory dst,
193 194
                    const dnnl::engine& engine) {
  auto reorder_prim = dnnl::reorder(src, dst);
195
  auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
196
  platform::RecordEvent record_reorder("int_reorder",
C
chenjian 已提交
197
                                       platform::TracerEventType::UserDefined,
198 199
                                       2,
                                       platform::EventRole::kUniqueOp);
A
Adam 已提交
200 201
  reorder_prim.execute(astream, src, dst);
  astream.wait();
M
mozga-intel 已提交
202 203
}

204
inline dnnl::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) {
205 206
  switch (tensor_rank) {
    case 1:
207
      return dnnl::memory::format_tag::a;
208
    case 2:
209
      return dnnl::memory::format_tag::ab;
210
    case 3:
211
      return dnnl::memory::format_tag::abc;
212
    case 4:
213
      return dnnl::memory::format_tag::abcd;
214
    case 5:
215
      return dnnl::memory::format_tag::abcde;
216
    case 6:
217
      return dnnl::memory::format_tag::abcdef;
218
    case 7:
219
      return dnnl::memory::format_tag::abcdefg;
220
    case 8:
221
      return dnnl::memory::format_tag::abcdefgh;
222
    case 9:
223
      return dnnl::memory::format_tag::abcdefghi;
224 225 226 227 228 229 230 231
    default:
      PADDLE_THROW(platform::errors::Unimplemented(
          "Paddle support tensors with rank in range <1, 9>, but received "
          "tensor with rank: %d",
          tensor_rank));
  }
}

232 233
inline MKLDNNMemoryFormat MKLDNNFormatForSize(size_t dims_size,
                                              MKLDNNMemoryFormat data_format) {
234
  if (dims_size == 1) {
235
    return MKLDNNMemoryFormat::x;
236
  } else if (dims_size == 2) {
237
    return MKLDNNMemoryFormat::nc;
238
  } else if (dims_size == 3) {
239 240 241 242
    if (data_format == MKLDNNMemoryFormat::nchw) {
      return MKLDNNMemoryFormat::ncw;
    } else if (data_format == MKLDNNMemoryFormat::nhwc) {
      return MKLDNNMemoryFormat::nwc;
243
    }
244
  } else if (dims_size == 4) {
245 246
    if (data_format == MKLDNNMemoryFormat::goihw) {
      return MKLDNNMemoryFormat::oihw;
247
    }
248
  } else if (dims_size == 5) {
249 250
    if (data_format == MKLDNNMemoryFormat::goidhw) {
      return MKLDNNMemoryFormat::oidhw;
251
    }
252 253 254 255
    if (data_format == MKLDNNMemoryFormat::nchw) {
      return MKLDNNMemoryFormat::ncdhw;
    } else if (data_format == MKLDNNMemoryFormat::nhwc) {
      return MKLDNNMemoryFormat::ndhwc;
256
    }
257
  } else if (dims_size == 6) {
258 259 260
    if (data_format == MKLDNNMemoryFormat::nchw) {
      return MKLDNNMemoryFormat::abcdef;
    }
261 262 263 264
  }
  return data_format;
}

265
inline MKLDNNMemoryFormat data_format_to_memory_format(
266
    const std::string& data_format) {
267 268
  switch (phi::StringToDataLayout(data_format)) {
    case phi::DataLayout::kNHWC:
269
      return MKLDNNMemoryFormat::nhwc;
270
    case phi::DataLayout::kNCHW:
271
      return MKLDNNMemoryFormat::nchw;
272
    default:
273
      return MKLDNNMemoryFormat::any;
274 275 276
  }
}

277
inline MKLDNNMemoryFormat StringToMKLDNNFormat(std::string* format) {
278 279 280
  std::transform(format->begin(), format->end(), format->begin(), ::tolower);

  if (!format->compare("nchw")) {
281
    return MKLDNNMemoryFormat::nchw;
282
  } else if (!format->compare("nchw16c")) {
283
    return MKLDNNMemoryFormat::nChw16c;
284
  } else if (!format->compare("nchw8c")) {
285
    return MKLDNNMemoryFormat::nChw8c;
286
  } else if (!format->compare("nhwc")) {
287
    return MKLDNNMemoryFormat::nhwc;
288
  } else {
289
    return MKLDNNMemoryFormat::any;
290 291 292
  }
}

A
Adam 已提交
293 294 295 296 297
inline std::string ThreadIDasStr(void) {
  return std::to_string(
      std::hash<std::thread::id>()(std::this_thread::get_id()));
}

298 299 300
template <typename T>
inline void AppendKey(std::string* key, const T& num) {
  key->append(std::to_string(num));
A
Adam 已提交
301 302
}

A
Adam 已提交
303 304
template <>
inline void AppendKey(std::string* key,
305
                      const dnnl::memory::format_tag& format) {
A
Adam 已提交
306 307 308 309 310
  key->append(std::to_string(static_cast<int>(format)));
}

template <>
inline void AppendKey(std::string* key,
311
                      const dnnl::memory::data_type& data_type) {
A
Adam 已提交
312 313 314 315
  key->append(std::to_string(static_cast<int>(data_type)));
}

template <>
316
inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) {
A
Adam 已提交
317 318 319 320 321
  key->append(std::to_string(static_cast<int>(algorithm)));
}

template <>
inline void AppendKey(std::string* key,
322
                      const dnnl::normalization_flags& flags) {
A
Adam 已提交
323 324 325
  key->append(std::to_string(static_cast<int>(flags)));
}

326 327
inline void AppendKey(std::string* key, const std::string& str) {
  key->append(str);
A
Adam 已提交
328 329
}

330
inline void AppendKey(std::string* key, const char* str) { key->append(str); }
A
Adam 已提交
331

A
Adam 已提交
332 333
template <typename T>
inline void AppendKey(std::string* key, const std::vector<T>& dims) {
334
  for (size_t i = 0; i < dims.size(); i++) {
A
Adam 已提交
335 336 337 338
    AppendKey(key, std::to_string(dims[i]));
  }
}

339 340 341 342
// If MKLDNN build and CPU place then register suffix in DeviceContext
inline void AttachPointerHashToMKLDNNKey(void* ptr,
                                         const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
J
Jacek Czaja 已提交
343 344 345 346 347 348 349 350 351 352 353 354 355
    // Static vars will remember first executor and its thread
    // so both of them need to be processed by the same thread within
    // critical section
    static std::mutex static_vars_barrier;
    static_vars_barrier.lock();
    static auto first_exec = ptr;
    static auto first_thread = ThreadIDasStr();
    static_vars_barrier.unlock();

    if (first_exec != ptr) {
      paddle::platform::MKLDNNDeviceContext::tls().set_key_suffix(
          "E" + std::to_string(reinterpret_cast<uintptr_t>(ptr)));
    }
356 357 358
    // Let's register adress of current executor
    paddle::platform::MKLDNNDeviceContext::tls().set_curr_exec(ptr);

J
Jacek Czaja 已提交
359 360 361 362
    // For first thread
    if (first_thread == ThreadIDasStr()) {
      paddle::platform::MKLDNNDeviceContext::tls().disable_tid_in_key();
    }
363 364 365
  }
}

366
template <typename... ArgTypes>
367 368
inline std::string CreateKey(const platform::MKLDNNDeviceContext& dev_ctx,
                             ArgTypes&&... args) {
369
  std::string key;
370
  key.reserve(64);
371
  using expand_type = int[];
372
  expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...};
J
Jacek Czaja 已提交
373
  key += paddle::platform::MKLDNNDeviceContext::tls().get_key_suffix();
374 375 376
  return key;
}

377 378
inline std::string ExtendKeyWithThreadInfoIfNeeded(
    const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key) {
J
Jacek Czaja 已提交
379 380
  return (paddle::platform::MKLDNNDeviceContext::tls().is_tid_used_in_key() ==
          true)
381 382 383 384
             ? key + "-t:" + ThreadIDasStr()
             : key;
}

A
Adam 已提交
385 386
inline std::vector<std::vector<int64_t>> ToMkldnnPadding(
    const std::vector<int64_t>& paddings) {
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
  if (paddings.size() == 6) {
    int padding_front = paddings[0];
    int padding_back = paddings[1];
    int padding_top = paddings[2];
    int padding_bottom = paddings[3];
    int padding_left = paddings[4];
    int padding_right = paddings[5];

    return {{padding_front, padding_top, padding_left},
            {padding_back, padding_bottom, padding_right}};
  } else {
    int padding_top = paddings[0];
    int padding_bottom = paddings[1];
    int padding_left = paddings[2];
    int padding_right = paddings[3];

    return {{padding_top, padding_left}, {padding_bottom, padding_right}};
  }
}

407 408 409 410 411 412 413 414 415 416 417 418 419
// The function adjusts the vector of weight dimensions for group convolutions
inline void GetGroupConvWeightsTz(std::vector<int64_t>& weights_tz,  // NOLINT
                                  const int groups) {
  if (groups > 1) {
    // if (is_conv3d) [o, i, d, h, w]->[g, o/g, i, d, h, w]
    // else [o, i, h, w] -> [g, o/g, i, h, w]
    weights_tz.push_back(0);
    std::rotate(weights_tz.begin(), weights_tz.end() - 1, weights_tz.end());
    weights_tz[0] = groups;
    weights_tz[1] = weights_tz[1] / groups;
  }
}

J
Jacek Czaja 已提交
420
inline void RegisterModelLayout(
421
    std::vector<std::unique_ptr<framework::OperatorBase>>& ops,  // NOLINT
J
Jacek Czaja 已提交
422 423
    const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
424 425 426
    // If there is already registered NHWC then quit this call
    // not to overwrite setting with analysis of internal "while" op block
    if (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() ==
427
        phi::DataLayout::kNHWC)
428 429
      return;

L
Leo Chen 已提交
430
    VLOG(4) << "RegisterModelLayout for mkldnn";
J
Jacek Czaja 已提交
431 432 433 434 435
    auto check_attrib = [](std::unique_ptr<framework::OperatorBase>& op,
                           const std::string& attrib_name) -> bool {
      if (op->HasAttr(attrib_name)) {
        auto data_format = op->Attr<std::string>(attrib_name);
        platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout(
436 437
            data_format.compare("NHWC") == 0 ? phi::DataLayout::kNHWC
                                             : phi::DataLayout::kNCHW);
J
Jacek Czaja 已提交
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
        return true;
      } else {
        return false;
      }
    };

    for (auto& op : ops) {
      if (check_attrib(op, std::string("data_format"))) {
        return;
      }
      if (check_attrib(op, std::string("data_layout"))) {
        return;
      }
    }
  }
}

455 456 457 458 459
inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) {
  return (op->GetAttrIfExists<std::string>("mkldnn_data_type") == "int8" ||
          op->GetAttrIfExists<bool>("use_quantizer"));
}

460 461 462 463 464 465 466
inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
  return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "bfloat16";
}

inline bool HasOpFLOAT32DataType(const paddle::framework::OpDesc* op) {
  return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "float32";
}
A
Adam Osewski 已提交
467

A
Adam 已提交
468 469
enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };

A
Adam Osewski 已提交
470 471 472 473 474
template <typename T>
bool constexpr is_int8() {
  return std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value;
}

T
tensor-tang 已提交
475
}  // namespace platform
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493

inline std::string FindInputNameByVarName(framework::OpDesc* op,
                                          const std::string& searched_name) {
  std::string ret;
  for (const auto& name : op->InputNames())
    for (const auto& input_name : op->Input(name))
      if (input_name == searched_name) ret = name;
  return ret;
}

inline std::string FindOutputNameByVarName(framework::OpDesc* op,
                                           const std::string& searched_name) {
  std::string ret;
  for (const auto& name : op->OutputNames())
    for (const auto& output_name : op->Output(name))
      if (output_name == searched_name) ret = name;
  return ret;
}
T
tensor-tang 已提交
494
}  // namespace paddle