diff --git a/paddle/phi/kernels/autotune/cache.cc b/paddle/phi/kernels/autotune/cache.cc index ad7a2b134a20c1d767f64ba5ec36cdd9ac5e361a..d72790f6341281d91ae0c29eb0ecbec22ce7b338 100644 --- a/paddle/phi/kernels/autotune/cache.cc +++ b/paddle/phi/kernels/autotune/cache.cc @@ -59,7 +59,7 @@ void AutoTuneCache::UpdateStatus() { cache_misses += v.second.CacheMisses(); } - for (auto& v : cudnn_auto_tune_map_) { + for (auto& v : conv_auto_tune_map_) { VLOG(4) << "AlgoType: " << std::setfill(' ') << std::setw(name_width) << AlgorithmTypeString(v.first) << " Cache Size: " << v.second.Size() diff --git a/paddle/phi/kernels/autotune/cache.h b/paddle/phi/kernels/autotune/cache.h index dc639e9f21ecfa7126b4a136acb2e38d5156f9fb..027ce58fd2cd4e5d3d299b74414d3bd9a0d77018 100644 --- a/paddle/phi/kernels/autotune/cache.h +++ b/paddle/phi/kernels/autotune/cache.h @@ -15,43 +15,10 @@ #pragma once #include -#include #include -#include -#include #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" - -DECLARE_int32(search_cache_max_number); - -inline void HashCombine(std::size_t* seed) {} - -// combine hash value -// https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x -template -inline void HashCombine(std::size_t* seed, const T& v, Rest... rest) { - std::hash hasher; - *seed ^= hasher(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); - *seed *= 0x00000100000001B3; - HashCombine(seed, rest...); -} - -// custom specialization of std::hash can be injected in namespace std -// ref: https://en.cppreference.com/w/cpp/utility/hash -namespace std { -template -struct hash> { - std::size_t operator()(std::vector const& vec) const noexcept { - std::size_t seed = 0xcbf29ce484222325; - for (auto val : vec) { - HashCombine(&seed, val); - } - return seed; - } -}; -} // namespace std +#include "paddle/phi/kernels/autotune/cache_base.h" namespace phi { namespace autotune { @@ -66,208 +33,10 @@ struct ConvAutoTuneResult { bool exhaustive_search = false; }; -template -size_t GetKey(Args&&... args) { - size_t seed = 0; - HashCombine(&seed, std::forward(args)...); - return seed; -} - -struct ConvCacheKey { - ConvCacheKey() {} - ConvCacheKey(const std::vector& arg_x_dims, - const std::vector& arg_w_dims, - const std::vector& arg_strides, - const std::vector& arg_paddings, - const std::vector& arg_dilations, - phi::DataType arg_dtype, - int arg_groups, - int64_t arg_data_layout) - : x_dims(arg_x_dims), - w_dims(arg_w_dims), - strides(arg_strides), - paddings(arg_paddings), - dilations(arg_dilations), - dtype(arg_dtype), - groups(arg_groups), - data_layout(arg_data_layout) {} - size_t hash_value() const { - return GetKey(x_dims, - w_dims, - strides, - paddings, - dilations, - static_cast(dtype), - groups, - data_layout); - } - - std::vector x_dims; - std::vector w_dims; - std::vector strides; - std::vector paddings; - std::vector dilations; - phi::DataType dtype; - int groups; - int64_t data_layout; -}; - -struct ConvCacheKeyHash { - size_t operator()(const ConvCacheKey& cache) const { - return cache.hash_value(); - } -}; - -struct ConvCacheKeyEqual { - size_t operator()(const ConvCacheKey& first, - const ConvCacheKey& second) const { - if (first.x_dims != second.x_dims) return false; - if (first.w_dims != second.w_dims) return false; - if (first.strides != second.strides) return false; - if (first.paddings != second.paddings) return false; - if (first.dilations != second.dilations) return false; - if (first.dtype != second.dtype) return false; - if (first.groups != second.groups) return false; - if (first.data_layout != second.data_layout) return false; - - return true; - } -}; - -class CudnnAlgorithmsCacheMap { - public: - CudnnAlgorithmsCacheMap() : cache_mutex_(new std::mutex()) { hash_.clear(); } - - ConvAutoTuneResult Get(const ConvCacheKey& key) { - std::lock_guard lock(*cache_mutex_); - PADDLE_ENFORCE_NE( - hash_.find(key), - hash_.end(), - phi::errors::PreconditionNotMet("The key does not exist.")); - return hash_[key]; - } - - bool Find(const ConvCacheKey& key) { - bool ret = false; - std::lock_guard lock(*cache_mutex_); - if (hash_.find(key) != hash_.end()) { - cache_hits_++; - ret = true; - } else { - cache_misses_++; - } - return ret; - } - - void Clean() { - std::lock_guard lock(*cache_mutex_); - hash_.clear(); - cache_hits_ = 0; - cache_misses_ = 0; - } - - void Set(const ConvCacheKey& key, ConvAutoTuneResult algo) { - std::lock_guard lock(*cache_mutex_); - if (hash_.size() > static_cast(FLAGS_search_cache_max_number)) { - hash_.clear(); - } - hash_[key] = algo; - } - - int64_t CacheMisses() const { return cache_misses_; } - - int64_t CacheHits() const { return cache_hits_; } - - float CacheHitRate() const { - int64_t num_accesses = cache_hits_ + cache_misses_; - float cache_hit_rate = 0.; - if (num_accesses != 0) { - cache_hit_rate = - static_cast(cache_hits_) / static_cast(num_accesses); - } - return cache_hit_rate; - } - - int64_t Size() const { return hash_.size(); } - - private: - std::unordered_map - hash_; - std::shared_ptr cache_mutex_; - - int64_t cache_hits_{0}; - int64_t cache_misses_{0}; -}; - size_t TransposeKey(const std::vector& x_dims, const std::vector& perm, phi::DataType dtype); -template -class AlgorithmsCache { - public: - AlgorithmsCache() : cache_mutex_(new std::mutex()) { hash_.clear(); } - - AlgorithmT Get(const size_t& key) { - std::lock_guard lock(*cache_mutex_); - PADDLE_ENFORCE_NE( - hash_.find(key), - hash_.end(), - phi::errors::PreconditionNotMet("The key does not exist.")); - return hash_[key]; - } - - bool Find(const size_t& key) { - bool ret = false; - std::lock_guard lock(*cache_mutex_); - if (hash_.find(key) != hash_.end()) { - cache_hits_++; - ret = true; - } else { - cache_misses_++; - } - return ret; - } - - void Clean() { - std::lock_guard lock(*cache_mutex_); - hash_.clear(); - cache_hits_ = 0; - cache_misses_ = 0; - } - - void Set(const size_t& key, AlgorithmT algo) { - std::lock_guard lock(*cache_mutex_); - hash_[key] = algo; - } - - int64_t CacheMisses() const { return cache_misses_; } - - int64_t CacheHits() const { return cache_hits_; } - - float CacheHitRate() const { - int64_t num_accesses = cache_hits_ + cache_misses_; - float cache_hit_rate = 0.; - if (num_accesses != 0) { - cache_hit_rate = - static_cast(cache_hits_) / static_cast(num_accesses); - } - return cache_hit_rate; - } - - int64_t Size() const { return hash_.size(); } - - private: - std::unordered_map hash_; - std::shared_ptr cache_mutex_; - - int64_t cache_hits_{0}; - int64_t cache_misses_{0}; -}; - enum class AlgorithmType { kConvForward = 1, kConvBackwardData = 2, @@ -278,11 +47,12 @@ enum class AlgorithmType { // AlgorithmsConfigKey -> AlgorithmsID // (todo. hong) use cudnnConvolutionFwdAlgo_t -using AlgorithmsCacheMap = AlgorithmsCache; +using AlgorithmsCacheMap = AlgorithmsCache; // AlgorithmType -> AlgorithmsCache using AlgorithmsTypeMap = std::unordered_map; -using CudnnAlgorithmsTypeMap = - std::unordered_map; +using ConvAlgorithmsCacheMap = ConvAlgorithmsCache; +using ConvAlgorithmsTypeMap = + std::unordered_map; class AutoTuneCache { public: @@ -295,8 +65,8 @@ class AutoTuneCache { return auto_tune_map_[static_cast(algo_type)]; } - CudnnAlgorithmsCacheMap& GetConv(const AlgorithmType& algo_type) { - return cudnn_auto_tune_map_[static_cast(algo_type)]; + ConvAlgorithmsCacheMap& GetConv(const AlgorithmType& algo_type) { + return conv_auto_tune_map_[static_cast(algo_type)]; } AlgorithmsCacheMap& GetTranspose() { return Get(AlgorithmType::kTranspose); } @@ -306,7 +76,7 @@ class AutoTuneCache { v.second.Clean(); } - for (auto& v : cudnn_auto_tune_map_) { + for (auto& v : conv_auto_tune_map_) { v.second.Clean(); } } @@ -344,8 +114,8 @@ class AutoTuneCache { algo_type == AlgorithmType::kConvBackwardFilter) { int64_t key = static_cast(algo_type); if (auto_tune_map_.find(key) == auto_tune_map_.end()) { - CudnnAlgorithmsCacheMap cache; - cudnn_auto_tune_map_[key] = cache; + ConvAlgorithmsCacheMap cache; + conv_auto_tune_map_[key] = cache; } } else { int64_t key = static_cast(algo_type); @@ -357,7 +127,7 @@ class AutoTuneCache { } AlgorithmsTypeMap auto_tune_map_; - CudnnAlgorithmsTypeMap cudnn_auto_tune_map_; + ConvAlgorithmsTypeMap conv_auto_tune_map_; std::shared_ptr autotune_cache_mutex_; int64_t total_cache_hits_{0}; int64_t total_cache_misses_{0}; diff --git a/paddle/phi/kernels/autotune/cache_base.h b/paddle/phi/kernels/autotune/cache_base.h new file mode 100644 index 0000000000000000000000000000000000000000..b5bfe365cd2eb678ca176c2d1d36a4aab8cfaf97 --- /dev/null +++ b/paddle/phi/kernels/autotune/cache_base.h @@ -0,0 +1,217 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" + +DECLARE_int32(search_cache_max_number); + +inline void HashCombine(std::size_t* seed) {} + +// combine hash value +// https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x +template +inline void HashCombine(std::size_t* seed, const T& v, Rest... rest) { + std::hash hasher; + *seed ^= hasher(v) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); + *seed *= 0x00000100000001B3; + HashCombine(seed, rest...); +} + +// custom specialization of std::hash can be injected in namespace std +// ref: https://en.cppreference.com/w/cpp/utility/hash +namespace std { +template +struct hash> { + std::size_t operator()(std::vector const& vec) const noexcept { + std::size_t seed = 0xcbf29ce484222325; + for (auto val : vec) { + HashCombine(&seed, val); + } + return seed; + } +}; +} // namespace std + +namespace phi { +namespace autotune { + +template +size_t GetKey(Args&&... args) { + size_t seed = 0; + HashCombine(&seed, std::forward(args)...); + return seed; +} + +struct ConvCacheKey { + ConvCacheKey() {} + ConvCacheKey(const std::vector& arg_x_dims, + const std::vector& arg_w_dims, + const std::vector& arg_strides, + const std::vector& arg_paddings, + const std::vector& arg_dilations, + phi::DataType arg_dtype, + int arg_groups, + int64_t arg_data_layout) + : x_dims(arg_x_dims), + w_dims(arg_w_dims), + strides(arg_strides), + paddings(arg_paddings), + dilations(arg_dilations), + dtype(arg_dtype), + groups(arg_groups), + data_layout(arg_data_layout) {} + size_t hash_value() const { + return GetKey(x_dims, + w_dims, + strides, + paddings, + dilations, + static_cast(dtype), + groups, + data_layout); + } + + std::vector x_dims; + std::vector w_dims; + std::vector strides; + std::vector paddings; + std::vector dilations; + phi::DataType dtype; + int groups; + int64_t data_layout; +}; + +struct ConvCacheKeyHash { + size_t operator()(const ConvCacheKey& cache) const { + return cache.hash_value(); + } +}; + +struct ConvCacheKeyEqual { + size_t operator()(const ConvCacheKey& first, + const ConvCacheKey& second) const { + if (first.x_dims != second.x_dims) return false; + if (first.w_dims != second.w_dims) return false; + if (first.strides != second.strides) return false; + if (first.paddings != second.paddings) return false; + if (first.dilations != second.dilations) return false; + if (first.dtype != second.dtype) return false; + if (first.groups != second.groups) return false; + if (first.data_layout != second.data_layout) return false; + + return true; + } +}; + +template , + typename KeyEqualT = std::equal_to> +class AlgorithmsCache { + public: + AlgorithmsCache() : cache_mutex_(new std::mutex()) {} + + AlgorithmT Get(const KeyT& key) { + std::lock_guard lock(*cache_mutex_); + PADDLE_ENFORCE_NE( + hash_.find(key), + hash_.end(), + phi::errors::PreconditionNotMet("The key does not exist.")); + return hash_[key]; + } + + bool Find(const KeyT& key) { + bool ret = false; + std::lock_guard lock(*cache_mutex_); + if (hash_.find(key) != hash_.end()) { + cache_hits_++; + ret = true; + } else { + cache_misses_++; + } + return ret; + } + + void Clean() { + std::lock_guard lock(*cache_mutex_); + hash_.clear(); + cache_hits_ = 0; + cache_misses_ = 0; + } + + void Set(const KeyT& key, AlgorithmT algo) { + std::lock_guard lock(*cache_mutex_); + hash_[key] = algo; + } + + int64_t CacheMisses() const { return cache_misses_; } + + int64_t CacheHits() const { return cache_hits_; } + + float CacheHitRate() const { + int64_t num_accesses = cache_hits_ + cache_misses_; + float cache_hit_rate = 0.; + if (num_accesses != 0) { + cache_hit_rate = + static_cast(cache_hits_) / static_cast(num_accesses); + } + return cache_hit_rate; + } + + int64_t Size() const { return hash_.size(); } + + protected: + std::unordered_map hash_; + std::shared_ptr cache_mutex_; + + int64_t cache_hits_{0}; + int64_t cache_misses_{0}; +}; + +template +class ConvAlgorithmsCache : public AlgorithmsCache { + public: + using AlgorithmsCacheBase = AlgorithmsCache; + + ConvAlgorithmsCache() + : AlgorithmsCache() {} + + void Set(const ConvCacheKey& key, AlgorithmT algo) { + std::lock_guard lock(*AlgorithmsCacheBase::cache_mutex_); + if (AlgorithmsCacheBase::hash_.size() > + static_cast(FLAGS_search_cache_max_number)) { + AlgorithmsCacheBase::hash_.clear(); + } + AlgorithmsCacheBase::hash_[key] = algo; + } +}; + +} // namespace autotune +} // namespace phi