From f4ead7885285dcf10817028c448d65a288bddfca Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 26 May 2020 20:28:29 +0800 Subject: [PATCH] feat(mgb): static allocation with given padding GitOrigin-RevId: fdf2de8ad6f767bf3d0c4f3ae9287bbd77b70c16 --- src/core/impl/comp_node/comp_node.cpp | 4 +++ .../graph/var_node_mem_mgr/seq_mem_opt.cpp | 5 ++- .../graph/var_node_mem_mgr/static_mem_alloc.h | 9 +++++ .../static_mem_alloc/impl.cpp | 2 +- .../var_node_mem_mgr/static_mem_alloc/impl.h | 7 +++- src/core/include/megbrain/comp_node.h | 12 +++++++ src/core/test/static_mem_alloc.cpp | 33 ++++++++++++------- 7 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index ae0401907..c87c5ab80 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -552,6 +552,10 @@ std::unique_ptr CompNode::ImplBase::create_seq_recorder( return {}; } +size_t CompNode::ImplBase::get_mem_padding() { + return 0; +} + void CompNode::ImplBase::add_callback(megdnn::thin_function&&) { mgb_throw(MegBrainError, "Unsupported add callback to " diff --git a/src/core/impl/graph/var_node_mem_mgr/seq_mem_opt.cpp b/src/core/impl/graph/var_node_mem_mgr/seq_mem_opt.cpp index 2c8ac503f..acf245c29 100644 --- a/src/core/impl/graph/var_node_mem_mgr/seq_mem_opt.cpp +++ b/src/core/impl/graph/var_node_mem_mgr/seq_mem_opt.cpp @@ -160,7 +160,9 @@ bool SeqMemOptimizer::plan_chunk_allocation() { if (chunk->owner_var == var) { size_t& usage = cn2usage[var->comp_node()]; size_t offset = usage; - usage += chunk->size(); + usage += get_aligned_power2( + chunk->size() + var->comp_node().get_mem_padding(), + var->comp_node().get_mem_addr_alignment()); chunk->mem_alloc_status.set_static_offset(offset); } } @@ -299,6 +301,7 @@ bool SeqMemOptimizer::run_static_mem_alloc_on_comp_node( auto allocator = StaticMemAlloc::make( StaticMemAlloc::AllocatorAlgo::PUSHDOWN); allocator->alignment(comp_node.get_mem_addr_alignment()); + allocator->padding(comp_node.get_mem_padding()); #if MGB_ENABLE_DEBUG_UTIL allocator->dbg_key2varnode = [](StaticMemAlloc::UserKeyType key) { return static_cast(key)->chunk->owner_var; diff --git a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc.h b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc.h index be907e08c..8c98d0f43 100644 --- a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc.h +++ b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc.h @@ -89,6 +89,15 @@ class StaticMemAlloc { */ virtual StaticMemAlloc& alignment(size_t alignment) = 0; + /*! + * \brief set interval padding at the end(except for overwritters) + * + * Must be called before calling add() + * + * \param padding interval padding + */ + virtual StaticMemAlloc& padding(size_t padding) = 0; + #if MGB_ENABLE_DEBUG_UTIL //! set by the caller to convert key to VarNode* for debug logging VarNode* (*dbg_key2varnode)(UserKeyType) = nullptr; diff --git a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.cpp b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.cpp index 37cfa0f8e..2ddc2bcd5 100644 --- a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.cpp +++ b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.cpp @@ -84,7 +84,7 @@ size_t StaticMemAllocImplHelper::add(size_t begin, size_t end, size_t size, mgb_assert(begin < end); auto id = m_interval_storage.size(); - m_interval_storage.push_back({begin, end, size, key, id}); + m_interval_storage.push_back({begin, end, size + m_padding, key, id}); return id; } diff --git a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.h b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.h index cba6c4166..b25ecfbb1 100644 --- a/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.h +++ b/src/core/impl/graph/var_node_mem_mgr/static_mem_alloc/impl.h @@ -45,6 +45,11 @@ class StaticMemAllocImplHelper: public StaticMemAlloc { return *this; } + StaticMemAlloc& padding(size_t padding) override final { + m_padding = padding; + return *this; + } + size_t tot_alloc_lower_bound() const override final { return m_peak_lower_bound; } @@ -69,7 +74,7 @@ class StaticMemAllocImplHelper: public StaticMemAlloc { } private: - size_t m_alignment = 1, m_peak_lower_bound = 0; + size_t m_alignment = 1, m_padding = 0, m_peak_lower_bound = 0; //! original interval storage std::vector m_interval_storage; diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index e1574416f..e31dc5393 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -288,6 +288,17 @@ class CompNode { return m_impl->get_mem_addr_alignment(); } + /*! + * \brief get the size of the paddings which must be reserved at the + * end of memory chunk; guaranteed to be power of 2 + */ + size_t get_mem_padding() const { + size_t padding = m_impl->get_mem_padding(); + mgb_assert(!(padding & (padding - 1)), + "mem padding should be power of 2"); + return padding; + } + /*! * \brief release consecutive free chunks on all devices to defragment; * see DevMemAlloc::try_coalesce_free @@ -510,6 +521,7 @@ class CompNode { const void *src, size_t size) = 0; virtual size_t get_mem_addr_alignment() = 0; + virtual size_t get_mem_padding(); virtual std::unique_ptr create_event(size_t flags) = 0; diff --git a/src/core/test/static_mem_alloc.cpp b/src/core/test/static_mem_alloc.cpp index 4b72aa37e..1224207aa 100644 --- a/src/core/test/static_mem_alloc.cpp +++ b/src/core/test/static_mem_alloc.cpp @@ -34,10 +34,11 @@ struct TestParam { using Algo = StaticMemAlloc::AllocatorAlgo; Algo algo; - size_t align, nr_rand_opr, rng_seed; + size_t align, padding, nr_rand_opr, rng_seed; static decltype(auto) make_values( const std::vector &aligns, + const std::vector &paddings, const std::vector &nr_rand_opr) { std::vector data; std::mt19937_64 rng(next_rand_seed()); @@ -46,9 +47,11 @@ struct TestParam { for (auto nr: nr_rand_opr) { size_t seed = rng(); for (auto align: aligns) { -#define itcb(algo) data.push_back({Algo::algo, align, nr, seed}); - ITER_ALGO(itcb) + for (auto padding: paddings) { +#define itcb(algo) data.push_back({Algo::algo, align, padding, nr, seed}); + ITER_ALGO(itcb) #undef itcb + } } } return ::testing::ValuesIn(data); @@ -65,7 +68,7 @@ std::ostream& operator << (std::ostream &ostr, const TestParam &p) { ITER_ALGO(itcb); #undef itcb - ostr << "algo=" << algo << " align=" << p.align; + ostr << "algo=" << algo << " align=" << p.align << " padding=" << p.padding; if (p.nr_rand_opr != 1) ostr << " nr_rand_opr=" << p.nr_rand_opr << " rng_seed=" << p.rng_seed; return ostr; @@ -75,6 +78,10 @@ class BasicCorrectness: public ::testing::TestWithParam { protected: std::unique_ptr m_allocator; + size_t padding() const { + return GetParam().padding; + } + size_t align(size_t addr) const { return get_aligned_power2(addr, GetParam().align); } @@ -84,6 +91,7 @@ class BasicCorrectness: public ::testing::TestWithParam { void SetUp() override { m_allocator = StaticMemAlloc::make(GetParam().algo); m_allocator->alignment(GetParam().align); + m_allocator->padding(GetParam().padding); } }; @@ -102,8 +110,9 @@ TEST_P(BasicCorrectness, Alloc) { allocator->add(0, 1, 1, makeuk(1)); allocator->add(1, 2, 2, makeuk(2)); allocator->solve(); - ASSERT_EQ(std::max(align(2), 2 * align(1)), allocator->tot_alloc()); - ASSERT_EQ(std::max(align(2), 2 * align(1)), + ASSERT_EQ(std::max(align(2 + padding()), 2 * align(1 + padding())), + allocator->tot_alloc()); + ASSERT_EQ(std::max(align(2 + padding()), 2 * align(1 + padding())), allocator->tot_alloc_lower_bound()); } @@ -116,8 +125,8 @@ TEST_P(BasicCorrectness, Overwrite) { allocator->add_overwrite_spec(id2, id1, 0); allocator->solve(); - ASSERT_EQ(align(3), allocator->tot_alloc()); - ASSERT_EQ(align(3), allocator->tot_alloc_lower_bound()); + ASSERT_EQ(align(3 + padding()), allocator->tot_alloc()); + ASSERT_EQ(align(3 + padding()), allocator->tot_alloc_lower_bound()); } TEST_P(BasicCorrectness, OverwriteSameEnd) { @@ -127,12 +136,12 @@ TEST_P(BasicCorrectness, OverwriteSameEnd) { allocator->add_overwrite_spec(id1, id0, 0); allocator->solve(); - ASSERT_EQ(align(1), allocator->tot_alloc()); - ASSERT_EQ(align(1), allocator->tot_alloc_lower_bound()); + ASSERT_EQ(align(1 + padding()), allocator->tot_alloc()); + ASSERT_EQ(align(1 + padding()), allocator->tot_alloc_lower_bound()); } INSTANTIATE_TEST_CASE_P(TestStaticMemAllocAlgo, - BasicCorrectness, TestParam::make_values({1, 2}, {1})); + BasicCorrectness, TestParam::make_values({1, 2}, {1, 2}, {1})); #ifdef __OPTIMIZE__ @@ -220,7 +229,7 @@ TEST_P(RandomOpr, Main) { } INSTANTIATE_TEST_CASE_P(TestStaticMemAllocAlgo, - RandomOpr, TestParam::make_values({1, 256}, { + RandomOpr, TestParam::make_values({1, 256}, {1, 32}, { 10, INTERVAL_MOVE_MAX_SIZE, 1000, 10000})); TEST(TestStaticMemAllocAlgo, PushdownChain) { -- GitLab