提交 2676fb73 编写于 作者: M Megvii Engine Team

refactor(imperative): remove enable_defrag interface as it is enabled by default

GitOrigin-RevId: de9e7d7f16e4d17e1e283159bef35f3a86509dc6
上级 d909950f
...@@ -10,7 +10,6 @@ import re ...@@ -10,7 +10,6 @@ import re
from typing import Union from typing import Union
from ..core._imperative_rt.core2 import set_option as _set_option from ..core._imperative_rt.core2 import set_option as _set_option
from ..core._imperative_rt.utils import _set_defrag
_eviction_threshold = 0 _eviction_threshold = 0
_evictee_minimum_size = 1024 ** 2 _evictee_minimum_size = 1024 ** 2
......
...@@ -216,9 +216,6 @@ void init_utils(py::module m) { ...@@ -216,9 +216,6 @@ void init_utils(py::module m) {
#endif #endif
// Debug code, internal only // Debug code, internal only
m.def("_set_defrag", [](bool enable) {
mgb::imperative::BlobManager::inst()->set_enable(enable);
});
m.def("_defrag", [](const mgb::CompNode& cn) { m.def("_defrag", [](const mgb::CompNode& cn) {
mgb::imperative::BlobManager::inst()->defrag(cn); mgb::imperative::BlobManager::inst()->defrag(cn);
}); });
......
...@@ -41,22 +41,14 @@ void BlobManagerImpl::unregister_blob(Blob* blob) { ...@@ -41,22 +41,14 @@ void BlobManagerImpl::unregister_blob(Blob* blob) {
} }
void BlobManagerImpl::alloc_with_defrag(Blob* blob, size_t size) { void BlobManagerImpl::alloc_with_defrag(Blob* blob, size_t size) {
if (!m_enable) { // try alloc
MGB_TRY { alloc_direct(blob, size); }
// if fail, try defrag, alloc again
MGB_CATCH(MemAllocError&, {
mgb_log_warn("memory allocation failed for blob; try defragmenting");
defrag(blob->m_comp_node);
alloc_direct(blob, size); alloc_direct(blob, size);
} else { });
// // debug
// defrag(blob->m_comp_node);
// alloc_direct(blob, storage, size);
// try alloc
MGB_TRY { alloc_direct(blob, size); }
// if fail, try defrag, alloc again
MGB_CATCH(MemAllocError&, {
mgb_log_warn("memory allocation failed for blob; try defragmenting");
defrag(blob->m_comp_node);
alloc_direct(blob, size);
});
}
} }
void BlobManagerImpl::alloc_direct(Blob* blob, size_t size) { void BlobManagerImpl::alloc_direct(Blob* blob, size_t size) {
...@@ -69,16 +61,12 @@ void BlobManagerImpl::alloc_direct(Blob* blob, size_t size) { ...@@ -69,16 +61,12 @@ void BlobManagerImpl::alloc_direct(Blob* blob, size_t size) {
DeviceTensorND BlobManagerImpl::alloc_workspace_with_defrag( DeviceTensorND BlobManagerImpl::alloc_workspace_with_defrag(
CompNode cn, TensorLayout layout) { CompNode cn, TensorLayout layout) {
DeviceTensorND dev_tensor; DeviceTensorND dev_tensor;
if (!m_enable) { MGB_TRY { dev_tensor = alloc_workspace(cn, layout); }
MGB_CATCH(MemAllocError&, {
mgb_log_warn("memory allocation failed for workspace; try defragmenting");
defrag(cn);
dev_tensor = alloc_workspace(cn, layout); dev_tensor = alloc_workspace(cn, layout);
} else { });
MGB_TRY { dev_tensor = alloc_workspace(cn, layout); }
MGB_CATCH(MemAllocError&, {
mgb_log_warn("memory allocation failed for workspace; try defragmenting");
defrag(cn);
dev_tensor = alloc_workspace(cn, layout);
});
}
return dev_tensor; return dev_tensor;
}; };
...@@ -154,10 +142,6 @@ void BlobManagerImpl::defrag(const CompNode& cn) { ...@@ -154,10 +142,6 @@ void BlobManagerImpl::defrag(const CompNode& cn) {
cn.sync(); cn.sync();
} }
void BlobManagerImpl::set_enable(bool flag) {
m_enable = flag;
}
struct BlobManagerStub : BlobManager { struct BlobManagerStub : BlobManager {
void alloc_direct(Blob* blob, size_t size) { void alloc_direct(Blob* blob, size_t size) {
mgb_assert(0, "prohibited after global variable destruction"); mgb_assert(0, "prohibited after global variable destruction");
...@@ -172,9 +156,6 @@ struct BlobManagerStub : BlobManager { ...@@ -172,9 +156,6 @@ struct BlobManagerStub : BlobManager {
mgb_assert(0, "prohibited after global variable destruction"); mgb_assert(0, "prohibited after global variable destruction");
}; };
void unregister_blob(Blob* blob){}; void unregister_blob(Blob* blob){};
void set_enable(bool flag) {
mgb_assert(0, "prohibited after global variable destruction");
};
void defrag(const CompNode& cn) { void defrag(const CompNode& cn) {
mgb_assert(0, "prohibited after global variable destruction"); mgb_assert(0, "prohibited after global variable destruction");
}; };
......
...@@ -38,7 +38,6 @@ class BlobManagerImpl final : public BlobManager { ...@@ -38,7 +38,6 @@ class BlobManagerImpl final : public BlobManager {
std::mutex m_mtx; std::mutex m_mtx;
CompNode::UnorderedMap<BlobSetWithMux> m_comp2blobs_map; CompNode::UnorderedMap<BlobSetWithMux> m_comp2blobs_map;
bool m_enable = true;
void defrag(const CompNode& cn) override; void defrag(const CompNode& cn) override;
...@@ -57,8 +56,6 @@ public: ...@@ -57,8 +56,6 @@ public:
void register_blob(Blob* blob) override; void register_blob(Blob* blob) override;
void unregister_blob(Blob* blob) override; void unregister_blob(Blob* blob) override;
void set_enable(bool flag) override;
}; };
} // namespace imperative } // namespace imperative
......
...@@ -33,8 +33,6 @@ public: ...@@ -33,8 +33,6 @@ public:
virtual void unregister_blob(Blob* blob) = 0; virtual void unregister_blob(Blob* blob) = 0;
virtual void set_enable(bool flag) = 0;
virtual void defrag(const CompNode& cn) = 0; virtual void defrag(const CompNode& cn) = 0;
}; };
......
...@@ -94,15 +94,13 @@ TEST(TestImperative, Split) { ...@@ -94,15 +94,13 @@ TEST(TestImperative, Split) {
} }
#if MGB_CUDA && MGB_ENABLE_EXCEPTION #if MGB_CUDA && MGB_ENABLE_EXCEPTION
void run_graph(size_t mem_reserved, bool enable_defrag) { void run_graph(size_t mem_reserved) {
CompNode::try_coalesce_all_free_memory(); CompNode::try_coalesce_all_free_memory();
CompNode::finalize(); CompNode::finalize();
auto cn = CompNode::load("gpux"); auto cn = CompNode::load("gpux");
cn.sync(); // wait for async init to finish cn.sync(); // wait for async init to finish
BlobManager::inst()->set_enable(enable_defrag);
HostTensorGenerator<> gen; HostTensorGenerator<> gen;
using TensorPtr = std::shared_ptr<Tensor>; using TensorPtr = std::shared_ptr<Tensor>;
TensorPtr ptr_a[100]; TensorPtr ptr_a[100];
...@@ -159,10 +157,7 @@ TEST(TestImperative, Defragment) { ...@@ -159,10 +157,7 @@ TEST(TestImperative, Defragment) {
} }
auto reserve_setting = ssprintf("b:%zu", reserve); auto reserve_setting = ssprintf("b:%zu", reserve);
auto do_run = [reserve]() { auto do_run = [reserve]() { run_graph(reserve); };
ASSERT_THROW(run_graph(reserve, false), MemAllocError);
run_graph(reserve, true);
};
// reserve memory explicitly to avoid uncontrollable factors // reserve memory explicitly to avoid uncontrollable factors
constexpr const char* KEY = "MGB_CUDA_RESERVE_MEMORY"; constexpr const char* KEY = "MGB_CUDA_RESERVE_MEMORY";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册