diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py index 6fdc05a034f23f635d65229178af60f42606516f..95538320dfcfcd985fb920ac361874e9a8fda452 100644 --- a/imperative/python/megengine/device.py +++ b/imperative/python/megengine/device.py @@ -9,6 +9,7 @@ import os from .core._imperative_rt.common import CompNode, DeviceType +from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config __all__ = [ "is_cuda_available", @@ -16,6 +17,7 @@ __all__ = [ "get_default_device", "set_default_device", "set_prealloc_config", + "DeviceType", ] @@ -94,15 +96,15 @@ def set_prealloc_config( alignment: int = 1, min_req: int = 32 * 1024 * 1024, max_overhead: int = 0, - growth_factor: float = 2.0, - device_type: str = "gpu", + growth_factor=2.0, + device_type=DeviceType.CUDA, ): - """specifies how to pre-allocate from raw device allocator + """specifies how to pre-allocate from raw dev allocator - :param alignment: specifies the alignment in byte - :param min_req: min request size in byte - :param max_overhead: max overhead above required size in byte - :growth_factor: request size = growth_factor * current allocated size + :param alignment: specifies the alignment in bytes. + :param min_req: min request size in bytes. + :param max_overhead: max overhead above required size in bytes. + :growth_factor: request size / cur allocated :device_type: the device type """ @@ -110,5 +112,4 @@ def set_prealloc_config( assert min_req > 0 assert max_overhead >= 0 assert growth_factor >= 1 - t = _str2device_type(device_type) - _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t) + _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type) diff --git a/imperative/python/src/common.cpp b/imperative/python/src/common.cpp index cdaaf81988e1bdb23118b8ec16c968309a60b5ba..aeb1f9e9ee6512575c9774629f3c923eb3a12dcf 100644 --- a/imperative/python/src/common.cpp +++ b/imperative/python/src/common.cpp @@ -165,6 +165,9 @@ void init_common(py::module m) { .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD) .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID); + m.def("set_prealloc_config", &CompNode::set_prealloc_config, + "specifies how to pre-allocate from raw dev allocator"); + init_npy_num_bfloat16(m); init_npy_num_intbx(m); } diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index d5eb31342b902b9c8642e8d00d5b2f9e531575dd..34a3ee8c0b6d96dfa6ab96d343c416d31c40051d 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -12,6 +12,8 @@ #include "megbrain/comp_node.h" #include "megbrain/comp_node_env.h" #include "megbrain/graph/exc_extra_info.h" +#include "megbrain/common.h" +#include "megbrain/comp_node/alloc.h" #include "./cuda/comp_node.h" #include "./cpu/comp_node.h" @@ -420,6 +422,21 @@ void CompNode::activate() const { static_cast(m_impl)->env().activate(); } +void CompNode::set_prealloc_config( + size_t alignment, + size_t min_req, + size_t max_overhead, + double growth_factor, + DeviceType device_type) { + switch (device_type) { + case DeviceType::CUDA: + CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor); + break; + default: + mgb_log_warn("unsupported device type for set_prealloc_config"); + }; +} + void* CompNode::alloc_device(size_t size) const { auto ret = m_impl->alloc_device(size); static_cast(m_impl)->env().on_mem_event(size, true, ret); diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp index b313f87ff149e265b7c6479b0298d6337a8d1200..6fb157f488be45f301084b156ff3d40dbef1f04f 100644 --- a/src/core/impl/comp_node/cuda/comp_node.cpp +++ b/src/core/impl/comp_node/cuda/comp_node.cpp @@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, using T = CudaCompNodeImpl::StaticData; static std::aligned_storage_t storage; sdptr = new(&storage)T; - MGB_LOCK_GUARD(sdptr->mtx); sdptr->prealloc_config.alignment = alignment; sdptr->prealloc_config.min_req = min_req; sdptr->prealloc_config.growth_factor = growth_factor; sdptr->prealloc_config.max_overhead = max_overhead; } else { mgb_log_warn( - "failed to invoke set_prealloc_config; fallback to default configuration; " - "prealloc_config should be specified before any invocation of load_cuda"); + "invalid call to set_prealloc_config, will fallback to " + "default config; " + "prealloc_config should be specified before any CUDA " + "memory allocation"); } } } @@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) { void CudaCompNode::sync_all() { } +void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, + size_t max_overhead, + double growth_factor) {} + #undef err #endif // MGB_CUDA diff --git a/src/core/impl/comp_node/cuda/comp_node.h b/src/core/impl/comp_node/cuda/comp_node.h index afb30ba5e8badc148d9ba85ef3bf5fe1c3a4e40b..f394e5603068077c115be43bcb02460e1a002145 100644 --- a/src/core/impl/comp_node/cuda/comp_node.h +++ b/src/core/impl/comp_node/cuda/comp_node.h @@ -32,9 +32,10 @@ namespace mgb { static Impl* load_cuda( const Locator &locator, const Locator &locator_logical); static void sync_all(); + + static void set_prealloc_config(size_t alignment, size_t min_req, + size_t max_overhead, double growth_factor); }; } // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} - - diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index b9fcee3fc09c51ef83bc42a2e9e0e58448f8ba3e..9cf376a9418cee5e4b6ea917cfa39e13925d2531 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -308,6 +308,14 @@ class CompNode { */ static void try_coalesce_all_free_memory(); + /* + * \brief specifies how to pre-allocate from raw dev allocator + * + */ + static void set_prealloc_config(size_t alignment, size_t min_req, + size_t max_overhead, double growth_factor, + DeviceType device_type); + /* =================== synchronization ======================== */ class Event;