未验证 提交 8758a338 编写于 作者: Y Yiqun Liu 提交者: GitHub

Simplify the autotune cache codes. (#47667)

上级 6cdc18af
...@@ -59,7 +59,7 @@ void AutoTuneCache::UpdateStatus() { ...@@ -59,7 +59,7 @@ void AutoTuneCache::UpdateStatus() {
cache_misses += v.second.CacheMisses(); cache_misses += v.second.CacheMisses();
} }
for (auto& v : cudnn_auto_tune_map_) { for (auto& v : conv_auto_tune_map_) {
VLOG(4) << "AlgoType: " << std::setfill(' ') << std::setw(name_width) VLOG(4) << "AlgoType: " << std::setfill(' ') << std::setw(name_width)
<< AlgorithmTypeString(v.first) << AlgorithmTypeString(v.first)
<< " Cache Size: " << v.second.Size() << " Cache Size: " << v.second.Size()
......
...@@ -15,43 +15,10 @@ ...@@ -15,43 +15,10 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <mutex>
#include <numeric> #include <numeric>
#include <unordered_map>
#include <vector>
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/autotune/cache_base.h"
#include "paddle/phi/core/errors.h"
DECLARE_int32(search_cache_max_number);
inline void HashCombine(std::size_t* seed) {}
// combine hash value
// https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
template <typename T, typename... Rest>
inline void HashCombine(std::size_t* seed, const T& v, Rest... rest) {
std::hash<T> hasher;
*seed ^= hasher(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
*seed *= 0x00000100000001B3;
HashCombine(seed, rest...);
}
// custom specialization of std::hash can be injected in namespace std
// ref: https://en.cppreference.com/w/cpp/utility/hash
namespace std {
template <typename T>
struct hash<std::vector<T>> {
std::size_t operator()(std::vector<T> const& vec) const noexcept {
std::size_t seed = 0xcbf29ce484222325;
for (auto val : vec) {
HashCombine(&seed, val);
}
return seed;
}
};
} // namespace std
namespace phi { namespace phi {
namespace autotune { namespace autotune {
...@@ -66,208 +33,10 @@ struct ConvAutoTuneResult { ...@@ -66,208 +33,10 @@ struct ConvAutoTuneResult {
bool exhaustive_search = false; bool exhaustive_search = false;
}; };
template <typename... Args>
size_t GetKey(Args&&... args) {
size_t seed = 0;
HashCombine(&seed, std::forward<Args>(args)...);
return seed;
}
struct ConvCacheKey {
ConvCacheKey() {}
ConvCacheKey(const std::vector<int64_t>& arg_x_dims,
const std::vector<int64_t>& arg_w_dims,
const std::vector<int>& arg_strides,
const std::vector<int>& arg_paddings,
const std::vector<int>& arg_dilations,
phi::DataType arg_dtype,
int arg_groups,
int64_t arg_data_layout)
: x_dims(arg_x_dims),
w_dims(arg_w_dims),
strides(arg_strides),
paddings(arg_paddings),
dilations(arg_dilations),
dtype(arg_dtype),
groups(arg_groups),
data_layout(arg_data_layout) {}
size_t hash_value() const {
return GetKey(x_dims,
w_dims,
strides,
paddings,
dilations,
static_cast<int64_t>(dtype),
groups,
data_layout);
}
std::vector<int64_t> x_dims;
std::vector<int64_t> w_dims;
std::vector<int> strides;
std::vector<int> paddings;
std::vector<int> dilations;
phi::DataType dtype;
int groups;
int64_t data_layout;
};
struct ConvCacheKeyHash {
size_t operator()(const ConvCacheKey& cache) const {
return cache.hash_value();
}
};
struct ConvCacheKeyEqual {
size_t operator()(const ConvCacheKey& first,
const ConvCacheKey& second) const {
if (first.x_dims != second.x_dims) return false;
if (first.w_dims != second.w_dims) return false;
if (first.strides != second.strides) return false;
if (first.paddings != second.paddings) return false;
if (first.dilations != second.dilations) return false;
if (first.dtype != second.dtype) return false;
if (first.groups != second.groups) return false;
if (first.data_layout != second.data_layout) return false;
return true;
}
};
class CudnnAlgorithmsCacheMap {
public:
CudnnAlgorithmsCacheMap() : cache_mutex_(new std::mutex()) { hash_.clear(); }
ConvAutoTuneResult Get(const ConvCacheKey& key) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
PADDLE_ENFORCE_NE(
hash_.find(key),
hash_.end(),
phi::errors::PreconditionNotMet("The key does not exist."));
return hash_[key];
}
bool Find(const ConvCacheKey& key) {
bool ret = false;
std::lock_guard<std::mutex> lock(*cache_mutex_);
if (hash_.find(key) != hash_.end()) {
cache_hits_++;
ret = true;
} else {
cache_misses_++;
}
return ret;
}
void Clean() {
std::lock_guard<std::mutex> lock(*cache_mutex_);
hash_.clear();
cache_hits_ = 0;
cache_misses_ = 0;
}
void Set(const ConvCacheKey& key, ConvAutoTuneResult algo) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
if (hash_.size() > static_cast<size_t>(FLAGS_search_cache_max_number)) {
hash_.clear();
}
hash_[key] = algo;
}
int64_t CacheMisses() const { return cache_misses_; }
int64_t CacheHits() const { return cache_hits_; }
float CacheHitRate() const {
int64_t num_accesses = cache_hits_ + cache_misses_;
float cache_hit_rate = 0.;
if (num_accesses != 0) {
cache_hit_rate =
static_cast<float>(cache_hits_) / static_cast<float>(num_accesses);
}
return cache_hit_rate;
}
int64_t Size() const { return hash_.size(); }
private:
std::unordered_map<ConvCacheKey,
ConvAutoTuneResult,
ConvCacheKeyHash,
ConvCacheKeyEqual>
hash_;
std::shared_ptr<std::mutex> cache_mutex_;
int64_t cache_hits_{0};
int64_t cache_misses_{0};
};
size_t TransposeKey(const std::vector<int64_t>& x_dims, size_t TransposeKey(const std::vector<int64_t>& x_dims,
const std::vector<int32_t>& perm, const std::vector<int32_t>& perm,
phi::DataType dtype); phi::DataType dtype);
template <typename AlgorithmT>
class AlgorithmsCache {
public:
AlgorithmsCache() : cache_mutex_(new std::mutex()) { hash_.clear(); }
AlgorithmT Get(const size_t& key) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
PADDLE_ENFORCE_NE(
hash_.find(key),
hash_.end(),
phi::errors::PreconditionNotMet("The key does not exist."));
return hash_[key];
}
bool Find(const size_t& key) {
bool ret = false;
std::lock_guard<std::mutex> lock(*cache_mutex_);
if (hash_.find(key) != hash_.end()) {
cache_hits_++;
ret = true;
} else {
cache_misses_++;
}
return ret;
}
void Clean() {
std::lock_guard<std::mutex> lock(*cache_mutex_);
hash_.clear();
cache_hits_ = 0;
cache_misses_ = 0;
}
void Set(const size_t& key, AlgorithmT algo) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
hash_[key] = algo;
}
int64_t CacheMisses() const { return cache_misses_; }
int64_t CacheHits() const { return cache_hits_; }
float CacheHitRate() const {
int64_t num_accesses = cache_hits_ + cache_misses_;
float cache_hit_rate = 0.;
if (num_accesses != 0) {
cache_hit_rate =
static_cast<float>(cache_hits_) / static_cast<float>(num_accesses);
}
return cache_hit_rate;
}
int64_t Size() const { return hash_.size(); }
private:
std::unordered_map<size_t, AlgorithmT> hash_;
std::shared_ptr<std::mutex> cache_mutex_;
int64_t cache_hits_{0};
int64_t cache_misses_{0};
};
enum class AlgorithmType { enum class AlgorithmType {
kConvForward = 1, kConvForward = 1,
kConvBackwardData = 2, kConvBackwardData = 2,
...@@ -278,11 +47,12 @@ enum class AlgorithmType { ...@@ -278,11 +47,12 @@ enum class AlgorithmType {
// AlgorithmsConfigKey -> AlgorithmsID // AlgorithmsConfigKey -> AlgorithmsID
// (todo. hong) use cudnnConvolutionFwdAlgo_t // (todo. hong) use cudnnConvolutionFwdAlgo_t
using AlgorithmsCacheMap = AlgorithmsCache<int64_t>; using AlgorithmsCacheMap = AlgorithmsCache<size_t, int64_t>;
// AlgorithmType -> AlgorithmsCache // AlgorithmType -> AlgorithmsCache
using AlgorithmsTypeMap = std::unordered_map<int64_t, AlgorithmsCacheMap>; using AlgorithmsTypeMap = std::unordered_map<int64_t, AlgorithmsCacheMap>;
using CudnnAlgorithmsTypeMap = using ConvAlgorithmsCacheMap = ConvAlgorithmsCache<ConvAutoTuneResult>;
std::unordered_map<int64_t, CudnnAlgorithmsCacheMap>; using ConvAlgorithmsTypeMap =
std::unordered_map<int64_t, ConvAlgorithmsCacheMap>;
class AutoTuneCache { class AutoTuneCache {
public: public:
...@@ -295,8 +65,8 @@ class AutoTuneCache { ...@@ -295,8 +65,8 @@ class AutoTuneCache {
return auto_tune_map_[static_cast<int64_t>(algo_type)]; return auto_tune_map_[static_cast<int64_t>(algo_type)];
} }
CudnnAlgorithmsCacheMap& GetConv(const AlgorithmType& algo_type) { ConvAlgorithmsCacheMap& GetConv(const AlgorithmType& algo_type) {
return cudnn_auto_tune_map_[static_cast<int64_t>(algo_type)]; return conv_auto_tune_map_[static_cast<int64_t>(algo_type)];
} }
AlgorithmsCacheMap& GetTranspose() { return Get(AlgorithmType::kTranspose); } AlgorithmsCacheMap& GetTranspose() { return Get(AlgorithmType::kTranspose); }
...@@ -306,7 +76,7 @@ class AutoTuneCache { ...@@ -306,7 +76,7 @@ class AutoTuneCache {
v.second.Clean(); v.second.Clean();
} }
for (auto& v : cudnn_auto_tune_map_) { for (auto& v : conv_auto_tune_map_) {
v.second.Clean(); v.second.Clean();
} }
} }
...@@ -344,8 +114,8 @@ class AutoTuneCache { ...@@ -344,8 +114,8 @@ class AutoTuneCache {
algo_type == AlgorithmType::kConvBackwardFilter) { algo_type == AlgorithmType::kConvBackwardFilter) {
int64_t key = static_cast<int64_t>(algo_type); int64_t key = static_cast<int64_t>(algo_type);
if (auto_tune_map_.find(key) == auto_tune_map_.end()) { if (auto_tune_map_.find(key) == auto_tune_map_.end()) {
CudnnAlgorithmsCacheMap cache; ConvAlgorithmsCacheMap cache;
cudnn_auto_tune_map_[key] = cache; conv_auto_tune_map_[key] = cache;
} }
} else { } else {
int64_t key = static_cast<int64_t>(algo_type); int64_t key = static_cast<int64_t>(algo_type);
...@@ -357,7 +127,7 @@ class AutoTuneCache { ...@@ -357,7 +127,7 @@ class AutoTuneCache {
} }
AlgorithmsTypeMap auto_tune_map_; AlgorithmsTypeMap auto_tune_map_;
CudnnAlgorithmsTypeMap cudnn_auto_tune_map_; ConvAlgorithmsTypeMap conv_auto_tune_map_;
std::shared_ptr<std::mutex> autotune_cache_mutex_; std::shared_ptr<std::mutex> autotune_cache_mutex_;
int64_t total_cache_hits_{0}; int64_t total_cache_hits_{0};
int64_t total_cache_misses_{0}; int64_t total_cache_misses_{0};
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mutex>
#include <unordered_map>
#include <vector>
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
DECLARE_int32(search_cache_max_number);
inline void HashCombine(std::size_t* seed) {}
// combine hash value
// https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x
template <typename T, typename... Rest>
inline void HashCombine(std::size_t* seed, const T& v, Rest... rest) {
std::hash<T> hasher;
*seed ^= hasher(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2);
*seed *= 0x00000100000001B3;
HashCombine(seed, rest...);
}
// custom specialization of std::hash can be injected in namespace std
// ref: https://en.cppreference.com/w/cpp/utility/hash
namespace std {
template <typename T>
struct hash<std::vector<T>> {
std::size_t operator()(std::vector<T> const& vec) const noexcept {
std::size_t seed = 0xcbf29ce484222325;
for (auto val : vec) {
HashCombine(&seed, val);
}
return seed;
}
};
} // namespace std
namespace phi {
namespace autotune {
template <typename... Args>
size_t GetKey(Args&&... args) {
size_t seed = 0;
HashCombine(&seed, std::forward<Args>(args)...);
return seed;
}
struct ConvCacheKey {
ConvCacheKey() {}
ConvCacheKey(const std::vector<int64_t>& arg_x_dims,
const std::vector<int64_t>& arg_w_dims,
const std::vector<int>& arg_strides,
const std::vector<int>& arg_paddings,
const std::vector<int>& arg_dilations,
phi::DataType arg_dtype,
int arg_groups,
int64_t arg_data_layout)
: x_dims(arg_x_dims),
w_dims(arg_w_dims),
strides(arg_strides),
paddings(arg_paddings),
dilations(arg_dilations),
dtype(arg_dtype),
groups(arg_groups),
data_layout(arg_data_layout) {}
size_t hash_value() const {
return GetKey(x_dims,
w_dims,
strides,
paddings,
dilations,
static_cast<int64_t>(dtype),
groups,
data_layout);
}
std::vector<int64_t> x_dims;
std::vector<int64_t> w_dims;
std::vector<int> strides;
std::vector<int> paddings;
std::vector<int> dilations;
phi::DataType dtype;
int groups;
int64_t data_layout;
};
struct ConvCacheKeyHash {
size_t operator()(const ConvCacheKey& cache) const {
return cache.hash_value();
}
};
struct ConvCacheKeyEqual {
size_t operator()(const ConvCacheKey& first,
const ConvCacheKey& second) const {
if (first.x_dims != second.x_dims) return false;
if (first.w_dims != second.w_dims) return false;
if (first.strides != second.strides) return false;
if (first.paddings != second.paddings) return false;
if (first.dilations != second.dilations) return false;
if (first.dtype != second.dtype) return false;
if (first.groups != second.groups) return false;
if (first.data_layout != second.data_layout) return false;
return true;
}
};
template <typename KeyT,
typename AlgorithmT,
typename HashT = std::hash<KeyT>,
typename KeyEqualT = std::equal_to<KeyT>>
class AlgorithmsCache {
public:
AlgorithmsCache() : cache_mutex_(new std::mutex()) {}
AlgorithmT Get(const KeyT& key) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
PADDLE_ENFORCE_NE(
hash_.find(key),
hash_.end(),
phi::errors::PreconditionNotMet("The key does not exist."));
return hash_[key];
}
bool Find(const KeyT& key) {
bool ret = false;
std::lock_guard<std::mutex> lock(*cache_mutex_);
if (hash_.find(key) != hash_.end()) {
cache_hits_++;
ret = true;
} else {
cache_misses_++;
}
return ret;
}
void Clean() {
std::lock_guard<std::mutex> lock(*cache_mutex_);
hash_.clear();
cache_hits_ = 0;
cache_misses_ = 0;
}
void Set(const KeyT& key, AlgorithmT algo) {
std::lock_guard<std::mutex> lock(*cache_mutex_);
hash_[key] = algo;
}
int64_t CacheMisses() const { return cache_misses_; }
int64_t CacheHits() const { return cache_hits_; }
float CacheHitRate() const {
int64_t num_accesses = cache_hits_ + cache_misses_;
float cache_hit_rate = 0.;
if (num_accesses != 0) {
cache_hit_rate =
static_cast<float>(cache_hits_) / static_cast<float>(num_accesses);
}
return cache_hit_rate;
}
int64_t Size() const { return hash_.size(); }
protected:
std::unordered_map<KeyT, AlgorithmT, HashT, KeyEqualT> hash_;
std::shared_ptr<std::mutex> cache_mutex_;
int64_t cache_hits_{0};
int64_t cache_misses_{0};
};
template <typename AlgorithmT>
class ConvAlgorithmsCache : public AlgorithmsCache<ConvCacheKey,
AlgorithmT,
ConvCacheKeyHash,
ConvCacheKeyEqual> {
public:
using AlgorithmsCacheBase = AlgorithmsCache<ConvCacheKey,
AlgorithmT,
ConvCacheKeyHash,
ConvCacheKeyEqual>;
ConvAlgorithmsCache()
: AlgorithmsCache<ConvCacheKey,
AlgorithmT,
ConvCacheKeyHash,
ConvCacheKeyEqual>() {}
void Set(const ConvCacheKey& key, AlgorithmT algo) {
std::lock_guard<std::mutex> lock(*AlgorithmsCacheBase::cache_mutex_);
if (AlgorithmsCacheBase::hash_.size() >
static_cast<size_t>(FLAGS_search_cache_max_number)) {
AlgorithmsCacheBase::hash_.clear();
}
AlgorithmsCacheBase::hash_[key] = algo;
}
};
} // namespace autotune
} // namespace phi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册