cache.h 5.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <algorithm>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "glog/logging.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"

inline void HashCombine(std::size_t* seed) {}

// combine hash value
// https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
template <typename T, typename... Rest>
inline void HashCombine(std::size_t* seed, const T& v, Rest... rest) {
  std::hash<T> hasher;
  *seed ^= hasher(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
  HashCombine(seed, rest...);
}

// custom specialization of std::hash can be injected in namespace std
// ref: https://en.cppreference.com/w/cpp/utility/hash
namespace std {
template <typename T>
struct hash<std::vector<T>> {
  std::size_t operator()(std::vector<T> const& vec) const noexcept {
    std::size_t seed = 0;
    for (auto val : vec) {
      HashCombine(&seed, val);
    }
    return seed;
  }
};
}  // namespace std

namespace phi {
namespace autotune {

54 55 56 57 58 59 60 61 62 63 64 65 66
template <typename... Args>
size_t GetKey(Args&&... args) {
  size_t seed = 0;
  HashCombine(&seed, std::forward<Args>(args)...);
  return seed;
}

// Define the cache key of operator
size_t ConvKey(const std::vector<int64_t>& x_dims,
               const std::vector<int64_t>& w_dims,
               const std::vector<int>& strides,
               const std::vector<int>& paddings,
               const std::vector<int>& dilations,
67
               phi::DataType dtype);
68

69 70 71
template <typename AlgorithmT>
class AlgorithmsCache {
 public:
72
  AlgorithmsCache() : cache_mutex_(new std::mutex()) { hash_.clear(); }
73 74

  AlgorithmT Get(size_t key) {
75
    std::lock_guard<std::mutex> lock(*cache_mutex_);
76 77 78 79 80 81 82 83 84
    PADDLE_ENFORCE_NE(
        hash_.find(key),
        hash_.end(),
        phi::errors::PreconditionNotMet("The key does not exist."));
    return hash_[key];
  }

  bool Find(size_t key) {
    bool ret = false;
85
    std::lock_guard<std::mutex> lock(*cache_mutex_);
86 87 88 89 90 91 92 93 94 95
    if (hash_.find(key) != hash_.end()) {
      cache_hits_++;
      ret = true;
    } else {
      cache_misses_++;
    }
    return ret;
  }

  void Set(size_t key, AlgorithmT algo) {
96
    std::lock_guard<std::mutex> lock(*cache_mutex_);
97 98 99
    hash_[key] = algo;
  }

100 101 102 103
  int64_t CacheMisses() const { return cache_misses_; }

  int64_t CacheHits() const { return cache_hits_; }

104 105
  float CacheHitRate() const {
    int64_t num_accesses = cache_hits_ + cache_misses_;
106 107 108 109 110
    float cache_hit_rate = 0.;
    if (num_accesses != 0) {
      cache_hit_rate =
          static_cast<float>(cache_hits_) / static_cast<float>(num_accesses);
    }
111 112 113
    return cache_hit_rate;
  }

114
  int64_t Size() const { return hash_.size(); }
115 116 117

 private:
  std::unordered_map<size_t, AlgorithmT> hash_;
118
  std::shared_ptr<std::mutex> cache_mutex_;
119 120 121 122
  int64_t cache_hits_ = 0;
  int64_t cache_misses_ = 0;
};

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
// AlgorithmsConfigKey -> AlgorithmsID
using AlgorithmsConfigKeyMap = AlgorithmsCache<int64_t>;
// AlgorithmsType -> AlgorithmsCache
using AlgorithmsTypeMap =
    std::unordered_map<std::string, AlgorithmsConfigKeyMap>;

class AutoTuneCache {
 public:
  static AutoTuneCache& Instance() {
    static AutoTuneCache autotune_cache;
    return autotune_cache;
  }

  AlgorithmsConfigKeyMap& RegisterOrGet(const std::string& algo_type) {
    std::lock_guard<std::mutex> lock(*autotune_cache_mutex_);
    if (auto_tune_map_.find(algo_type) == auto_tune_map_.end()) {
      AlgorithmsConfigKeyMap cache;
      auto_tune_map_[algo_type] = cache;
    }
    return auto_tune_map_[algo_type];
  }

145 146 147 148 149 150 151 152 153 154 155 156 157
  void Clean(float miss_rate) {
    std::lock_guard<std::mutex> lock(*autotune_cache_mutex_);
    // Set a small tolerance to avoid performance degradation
    // due to large cache size under dynamic shape.
    if (miss_rate > 0.01) {
      auto_tune_map_.clear();
    }
  }

  void UpdateStatus() {
    int64_t size = 0;
    int64_t cache_hits = 0;
    int64_t cache_misses = 0;
158
    for (auto& v : auto_tune_map_) {
159 160 161 162 163 164 165
      VLOG(4) << "AlgoType: " << v.first << " Cache Size: " << v.second.Size()
              << " Hits: " << v.second.CacheHits()
              << " Misses: " << v.second.CacheMisses()
              << " Hit Rate: " << v.second.CacheHitRate();
      size += v.second.Size();
      cache_hits += v.second.CacheHits();
      cache_misses += v.second.CacheMisses();
166
    }
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
    total_size_ = size;
    total_cache_hits_ = cache_hits;
    total_cache_misses_ = cache_misses;
  }

  // The number of total config cached
  int64_t Size() const { return total_size_; }

  int64_t CacheHits() const { return total_cache_hits_; }

  int64_t CacheMisses() const { return total_cache_misses_; }

  float CacheHitRate() const {
    float total_cache_hit_rate = 0.;
    int64_t total_num_accesses = total_cache_hits_ + total_cache_misses_;
    if (total_num_accesses != 0) {
      total_cache_hit_rate = static_cast<float>(total_cache_hits_) /
                             static_cast<float>(total_num_accesses);
    }

    return total_cache_hit_rate;
188 189 190 191 192 193
  }

 private:
  AutoTuneCache() : autotune_cache_mutex_(new std::mutex()) {}
  AlgorithmsTypeMap auto_tune_map_;
  std::shared_ptr<std::mutex> autotune_cache_mutex_;
194 195 196
  int64_t total_cache_hits_ = 0;
  int64_t total_cache_misses_ = 0;
  int64_t total_size_ = 0;
197 198
};

199 200
}  // namespace autotune
}  // namespace phi