mkldnn_helper.h 7.7 KB
Newer Older
1
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
T
tensor-tang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once

16
#include <algorithm>
J
Jacek Czaja 已提交
17
#include <iostream>
P
Physher 已提交
18
#include <memory>
J
Jacek Czaja 已提交
19
#include <sstream>
G
gongweibao 已提交
20
#include <string>
21
#include <utility>
22
#include <vector>
23

24
#include "dnnl.hpp"  // NOLINT
25
#include "paddle/fluid/framework/operator.h"
M
mozga-intel 已提交
26
#include "paddle/fluid/platform/place.h"
C
chenjian 已提交
27
#include "paddle/fluid/platform/profiler/event_tracing.h"
28
#include "paddle/phi/backends/onednn/onednn_helper.h"
T
tensor-tang 已提交
29
namespace paddle {
30
#ifdef PADDLE_WITH_MKLDNN
31
using OneDNNMemoryFormat = dnnl::memory::format_tag;
32
#endif
T
tensor-tang 已提交
33 34
namespace platform {

35 36 37 38 39 40
template <class Type>
using tf_desc = typename Type::desc;

template <class Type>
using tf_pd = typename Type::primitive_desc;

41 42
inline void ClearMKLDNNCache(const platform::Place& place,
                             void* ptr = nullptr) {
43 44 45 46 47
  // Clear mkl-dnn cache,
  if (platform::is_cpu_place(place)) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::MKLDNNDeviceContext* dev_ctx =
        (platform::MKLDNNDeviceContext*)pool.Get(place);
48
    dev_ctx->ResetBlobMap(ptr);
49 50 51
  }
}

52 53 54 55 56 57 58 59 60 61
inline void DontClearMKLDNNCache(const platform::Place& place) {
  // Clear mkl-dnn cache,
  if (platform::is_cpu_place(place)) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::MKLDNNDeviceContext* dev_ctx =
        (platform::MKLDNNDeviceContext*)pool.Get(place);
    dev_ctx->BlockNextCacheClearing();
  }
}

62 63
inline void Reorder(dnnl::memory src,
                    dnnl::memory dst,
64 65
                    const dnnl::engine& engine) {
  auto reorder_prim = dnnl::reorder(src, dst);
66
  auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
67
  platform::RecordEvent record_reorder("int_reorder",
C
chenjian 已提交
68
                                       platform::TracerEventType::UserDefined,
69 70
                                       2,
                                       platform::EventRole::kUniqueOp);
A
Adam 已提交
71 72
  reorder_prim.execute(astream, src, dst);
  astream.wait();
M
mozga-intel 已提交
73 74
}

A
Adam 已提交
75 76 77 78 79
inline std::string ThreadIDasStr(void) {
  return std::to_string(
      std::hash<std::thread::id>()(std::this_thread::get_id()));
}

80 81 82
template <typename T>
inline void AppendKey(std::string* key, const T& num) {
  key->append(std::to_string(num));
A
Adam 已提交
83 84
}

A
Adam 已提交
85 86
template <>
inline void AppendKey(std::string* key,
87
                      const dnnl::memory::format_tag& format) {
A
Adam 已提交
88 89 90 91 92
  key->append(std::to_string(static_cast<int>(format)));
}

template <>
inline void AppendKey(std::string* key,
93
                      const dnnl::memory::data_type& data_type) {
A
Adam 已提交
94 95 96 97
  key->append(std::to_string(static_cast<int>(data_type)));
}

template <>
98
inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) {
A
Adam 已提交
99 100 101 102 103
  key->append(std::to_string(static_cast<int>(algorithm)));
}

template <>
inline void AppendKey(std::string* key,
104
                      const dnnl::normalization_flags& flags) {
A
Adam 已提交
105 106 107
  key->append(std::to_string(static_cast<int>(flags)));
}

108 109
inline void AppendKey(std::string* key, const std::string& str) {
  key->append(str);
A
Adam 已提交
110 111
}

112
inline void AppendKey(std::string* key, const char* str) { key->append(str); }
A
Adam 已提交
113

A
Adam 已提交
114 115
template <typename T>
inline void AppendKey(std::string* key, const std::vector<T>& dims) {
116
  for (size_t i = 0; i < dims.size(); i++) {
A
Adam 已提交
117 118 119 120
    AppendKey(key, std::to_string(dims[i]));
  }
}

121 122 123 124
// If MKLDNN build and CPU place then register suffix in DeviceContext
inline void AttachPointerHashToMKLDNNKey(void* ptr,
                                         const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
J
Jacek Czaja 已提交
125 126 127 128 129 130 131 132 133 134 135 136 137
    // Static vars will remember first executor and its thread
    // so both of them need to be processed by the same thread within
    // critical section
    static std::mutex static_vars_barrier;
    static_vars_barrier.lock();
    static auto first_exec = ptr;
    static auto first_thread = ThreadIDasStr();
    static_vars_barrier.unlock();

    if (first_exec != ptr) {
      paddle::platform::MKLDNNDeviceContext::tls().set_key_suffix(
          "E" + std::to_string(reinterpret_cast<uintptr_t>(ptr)));
    }
138 139 140
    // Let's register adress of current executor
    paddle::platform::MKLDNNDeviceContext::tls().set_curr_exec(ptr);

J
Jacek Czaja 已提交
141 142 143 144
    // For first thread
    if (first_thread == ThreadIDasStr()) {
      paddle::platform::MKLDNNDeviceContext::tls().disable_tid_in_key();
    }
145 146 147
  }
}

148
template <typename... ArgTypes>
149 150
inline std::string CreateKey(const platform::MKLDNNDeviceContext& dev_ctx,
                             ArgTypes&&... args) {
151
  std::string key;
152
  key.reserve(64);
153
  using expand_type = int[];
154
  expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...};
J
Jacek Czaja 已提交
155
  key += paddle::platform::MKLDNNDeviceContext::tls().get_key_suffix();
156 157 158
  return key;
}

159 160
inline std::string ExtendKeyWithThreadInfoIfNeeded(
    const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key) {
J
Jacek Czaja 已提交
161 162
  return (paddle::platform::MKLDNNDeviceContext::tls().is_tid_used_in_key() ==
          true)
163 164 165 166
             ? key + "-t:" + ThreadIDasStr()
             : key;
}

J
Jacek Czaja 已提交
167
inline void RegisterModelLayout(
168
    std::vector<std::unique_ptr<framework::OperatorBase>>& ops,  // NOLINT
J
Jacek Czaja 已提交
169 170
    const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
171 172 173
    // If there is already registered NHWC then quit this call
    // not to overwrite setting with analysis of internal "while" op block
    if (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() ==
174
        phi::DataLayout::kNHWC)
175 176
      return;

L
Leo Chen 已提交
177
    VLOG(4) << "RegisterModelLayout for mkldnn";
J
Jacek Czaja 已提交
178 179 180 181 182
    auto check_attrib = [](std::unique_ptr<framework::OperatorBase>& op,
                           const std::string& attrib_name) -> bool {
      if (op->HasAttr(attrib_name)) {
        auto data_format = op->Attr<std::string>(attrib_name);
        platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout(
183 184
            data_format.compare("NHWC") == 0 ? phi::DataLayout::kNHWC
                                             : phi::DataLayout::kNCHW);
J
Jacek Czaja 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
        return true;
      } else {
        return false;
      }
    };

    for (auto& op : ops) {
      if (check_attrib(op, std::string("data_format"))) {
        return;
      }
      if (check_attrib(op, std::string("data_layout"))) {
        return;
      }
    }
  }
}

202 203 204 205 206
inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) {
  return (op->GetAttrIfExists<std::string>("mkldnn_data_type") == "int8" ||
          op->GetAttrIfExists<bool>("use_quantizer"));
}

207 208 209 210
inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
  return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "bfloat16";
}

A
Adam 已提交
211 212
enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };

T
tensor-tang 已提交
213
}  // namespace platform
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

inline std::string FindInputNameByVarName(framework::OpDesc* op,
                                          const std::string& searched_name) {
  std::string ret;
  for (const auto& name : op->InputNames())
    for (const auto& input_name : op->Input(name))
      if (input_name == searched_name) ret = name;
  return ret;
}

inline std::string FindOutputNameByVarName(framework::OpDesc* op,
                                           const std::string& searched_name) {
  std::string ret;
  for (const auto& name : op->OutputNames())
    for (const auto& output_name : op->Output(name))
      if (output_name == searched_name) ret = name;
  return ret;
}
T
tensor-tang 已提交
232
}  // namespace paddle