From 7dfd38460457770ff4c178060dac43fc92e6470e Mon Sep 17 00:00:00 2001 From: Zhang Ting Date: Thu, 31 Mar 2022 11:46:15 +0800 Subject: [PATCH] Implement AutotuneCache class for Kernel AutoTune (#41169) --- paddle/phi/kernels/autotune/cache.h | 92 ++++++++++++++++------- paddle/phi/kernels/autotune/cache_test.cc | 22 ++++-- 2 files changed, 81 insertions(+), 33 deletions(-) diff --git a/paddle/phi/kernels/autotune/cache.h b/paddle/phi/kernels/autotune/cache.h index c5b068c2899..990843e58f7 100644 --- a/paddle/phi/kernels/autotune/cache.h +++ b/paddle/phi/kernels/autotune/cache.h @@ -51,20 +51,35 @@ struct hash> { namespace phi { namespace autotune { +template +size_t GetKey(Args&&... args) { + size_t seed = 0; + HashCombine(&seed, std::forward(args)...); + return seed; +} + +// Define the cache key of operator +size_t ConvKey(const std::vector& x_dims, + const std::vector& w_dims, + const std::vector& strides, + const std::vector& paddings, + const std::vector& dilations, + phi::DataType dtype) { + return GetKey(x_dims, + w_dims, + strides, + paddings, + dilations, + static_cast(dtype)); +} + template class AlgorithmsCache { public: - AlgorithmsCache() { hash_.clear(); } - - template - size_t GetKey(Args&&... args) { - size_t seed = 0; - HashCombine(&seed, std::forward(args)...); - return seed; - } + AlgorithmsCache() : cache_mutex_(new std::mutex()) { hash_.clear(); } AlgorithmT Get(size_t key) { - std::lock_guard lock(cache_mutex_); + std::lock_guard lock(*cache_mutex_); PADDLE_ENFORCE_NE( hash_.find(key), hash_.end(), @@ -74,7 +89,7 @@ class AlgorithmsCache { bool Find(size_t key) { bool ret = false; - std::lock_guard lock(cache_mutex_); + std::lock_guard lock(*cache_mutex_); if (hash_.find(key) != hash_.end()) { cache_hits_++; ret = true; @@ -85,7 +100,7 @@ class AlgorithmsCache { } void Set(size_t key, AlgorithmT algo) { - std::lock_guard lock(cache_mutex_); + std::lock_guard lock(*cache_mutex_); hash_[key] = algo; } @@ -96,27 +111,52 @@ class AlgorithmsCache { return cache_hit_rate; } - // Define the cache key of operator - size_t ConvKey(const std::vector& x_dims, - const std::vector& w_dims, - const std::vector& strides, - const std::vector& paddings, - const std::vector& dilations, - phi::DataType dtype) { - return GetKey(x_dims, - w_dims, - strides, - paddings, - dilations, - static_cast(dtype)); - } + int64_t Size() { return hash_.size(); } private: std::unordered_map hash_; - std::mutex cache_mutex_; + std::shared_ptr cache_mutex_; int64_t cache_hits_ = 0; int64_t cache_misses_ = 0; }; +// AlgorithmsConfigKey -> AlgorithmsID +using AlgorithmsConfigKeyMap = AlgorithmsCache; +// AlgorithmsType -> AlgorithmsCache +using AlgorithmsTypeMap = + std::unordered_map; + +class AutoTuneCache { + public: + static AutoTuneCache& Instance() { + static AutoTuneCache autotune_cache; + return autotune_cache; + } + + AlgorithmsConfigKeyMap& RegisterOrGet(const std::string& algo_type) { + std::lock_guard lock(*autotune_cache_mutex_); + if (auto_tune_map_.find(algo_type) == auto_tune_map_.end()) { + AlgorithmsConfigKeyMap cache; + auto_tune_map_[algo_type] = cache; + } + return auto_tune_map_[algo_type]; + } + + // The number of total config cached + int64_t Size() { + int64_t total = 0; + for (auto& v : auto_tune_map_) { + VLOG(3) << v.first << " " << v.second.Size(); + total += v.second.Size(); + } + return total; + } + + private: + AutoTuneCache() : autotune_cache_mutex_(new std::mutex()) {} + AlgorithmsTypeMap auto_tune_map_; + std::shared_ptr autotune_cache_mutex_; +}; + } // namespace autotune } // namespace phi diff --git a/paddle/phi/kernels/autotune/cache_test.cc b/paddle/phi/kernels/autotune/cache_test.cc index b08a6cfc14a..9fcd9b796d0 100644 --- a/paddle/phi/kernels/autotune/cache_test.cc +++ b/paddle/phi/kernels/autotune/cache_test.cc @@ -18,10 +18,12 @@ #include #include "glog/logging.h" -void Algo() { VLOG(3) << "algo test"; } +enum ConvAlgos { GEMMKernel = 0, CuDNNKernel_1 = 1, CuDNNKernel_2 = 2 }; TEST(AlgosCache, AlgosCache) { - phi::autotune::AlgorithmsCache> cache; + auto autotune_cache = phi::autotune::AutoTuneCache::Instance(); + auto& cache = autotune_cache.RegisterOrGet("conv_fw"); + std::vector x_shape = {4, 224, 224, 3}; std::vector w_shape = {32, 3, 3, 3}; std::vector paddings = {0, 0}; @@ -29,17 +31,23 @@ TEST(AlgosCache, AlgosCache) { std::vector dilations = {1, 1}; phi::DataType dtype = paddle::experimental::CppTypeToDataType::Type(); - auto key = - cache.ConvKey(x_shape, w_shape, paddings, strides, dilations, dtype); + auto key = phi::autotune::ConvKey( + x_shape, w_shape, paddings, strides, dilations, dtype); EXPECT_EQ(cache.Find(key), false); - cache.Set(key, Algo); + cache.Set(key, ConvAlgos::GEMMKernel); + EXPECT_EQ(cache.Size(), 1); EXPECT_EQ(cache.Find(key), true); auto algo = cache.Get(key); - algo(); + EXPECT_EQ(algo, ConvAlgos::GEMMKernel); x_shape = {4, 128, 128, 3}; - key = cache.ConvKey(x_shape, w_shape, paddings, strides, dilations, dtype); + key = phi::autotune::ConvKey( + x_shape, w_shape, paddings, strides, dilations, dtype); EXPECT_EQ(cache.Find(key), false); + cache.Set(key, ConvAlgos::CuDNNKernel_1); + EXPECT_EQ(cache.Size(), 2); + EXPECT_EQ(autotune_cache.Size(), 2); + float cache_hit_rate = static_cast(1) / static_cast(3); EXPECT_LT(std::abs(cache_hit_rate - cache.CacheHitRate()), 1e-5); } -- GitLab