feat(mgb/comp_node): add set_prealloc_config

GitOrigin-RevId: e725e7efdd78e4e8ae85ac963988aef55ee5f9c4

feat(mgb/comp_node): add set_prealloc_config
GitOrigin-RevId: e725e7efdd78e4e8ae85ac963988aef55ee5f9c4
a7b9ece4 · Megvii Engine Team · 066da0bf · a7b9ece4 · a7b9ece4 · a7b9ece4
6 changed file
--- a/imperative/python/megengine/device.py
+++ b/imperative/python/megengine/device.py
@@ -9,6 +9,7 @@
 import os

 from .core._imperative_rt.common import CompNode, DeviceType
+from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config

 __all__ = [
    "is_cuda_available",
@@ -16,6 +17,7 @@ __all__ = [
    "get_default_device",
    "set_default_device",
    "set_prealloc_config",
+    "DeviceType",
 ]


@@ -94,15 +96,15 @@ def set_prealloc_config(
    alignment: int = 1,
    min_req: int = 32 * 1024 * 1024,
    max_overhead: int = 0,
-    growth_factor: float = 2.0,
-    device_type: str = "gpu",
+    growth_factor=2.0,
+    device_type=DeviceType.CUDA,
 ):
-    """specifies how to pre-allocate from raw device allocator
+    """specifies how to pre-allocate from raw dev allocator

-    :param alignment: specifies the alignment in byte
-    :param min_req: min request size in byte
-    :param max_overhead: max overhead above required size in byte
-    :growth_factor: request size = growth_factor * current allocated size
+    :param alignment: specifies the alignment in bytes.
+    :param min_req: min request size in bytes.
+    :param max_overhead: max overhead above required size in bytes.
+    :growth_factor: request size / cur allocated
    :device_type: the device type

    """
@@ -110,5 +112,4 @@ def set_prealloc_config(
    assert min_req > 0
    assert max_overhead >= 0
    assert growth_factor >= 1
-    t = _str2device_type(device_type)
-    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t)
+    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type)
--- a/imperative/python/src/common.cpp
+++ b/imperative/python/src/common.cpp
@@ -165,6 +165,9 @@ void init_common(py::module m) {
            .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
            .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);

+    m.def("set_prealloc_config", &CompNode::set_prealloc_config, 
+        "specifies how to pre-allocate from raw dev allocator");
+
    init_npy_num_bfloat16(m);
    init_npy_num_intbx(m);
 }
--- a/src/core/impl/comp_node/comp_node.cpp
+++ b/src/core/impl/comp_node/comp_node.cpp
@@ -12,6 +12,8 @@
 #include "megbrain/comp_node.h"
 #include "megbrain/comp_node_env.h"
 #include "megbrain/graph/exc_extra_info.h"
+#include "megbrain/common.h"
+#include "megbrain/comp_node/alloc.h"

 #include "./cuda/comp_node.h"
 #include "./cpu/comp_node.h"
@@ -420,6 +422,21 @@ void CompNode::activate() const {
    static_cast<Impl*>(m_impl)->env().activate();
 }

+void CompNode::set_prealloc_config(
+    size_t alignment, 
+    size_t min_req, 
+    size_t max_overhead, 
+    double growth_factor, 
+    DeviceType device_type) {
+    switch (device_type) {
+        case DeviceType::CUDA:
+            CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor);
+            break;
+        default:
+            mgb_log_warn("unsupported device type for set_prealloc_config");
+    };
+}
+
 void* CompNode::alloc_device(size_t size) const {
    auto ret = m_impl->alloc_device(size);
    static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret);

--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req,
            using T = CudaCompNodeImpl::StaticData;
            static std::aligned_storage_t<sizeof(T), alignof(T)> storage;
            sdptr = new(&storage)T;
-            MGB_LOCK_GUARD(sdptr->mtx);
            sdptr->prealloc_config.alignment = alignment;
            sdptr->prealloc_config.min_req = min_req;
            sdptr->prealloc_config.growth_factor = growth_factor;
            sdptr->prealloc_config.max_overhead = max_overhead;
        } else {
            mgb_log_warn(
-                "failed to invoke set_prealloc_config; fallback to default configuration; "
-                "prealloc_config should be specified before any invocation of load_cuda");
+                "invalid call to set_prealloc_config, will fallback to "
+                "default config; "
+                "prealloc_config should be specified before any CUDA "
+                "memory allocation");
        }
    }
 }
@@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) {
 void CudaCompNode::sync_all() {
 }

+void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, 
+                                       size_t max_overhead,
+                                       double growth_factor) {}
+
 #undef err

 #endif // MGB_CUDA

--- a/src/core/impl/comp_node/cuda/comp_node.h
+++ b/src/core/impl/comp_node/cuda/comp_node.h
@@ -32,9 +32,10 @@ namespace mgb {
            static Impl* load_cuda(
                    const Locator &locator, const Locator &locator_logical);
            static void sync_all();
+
+            static void set_prealloc_config(size_t alignment, size_t min_req,
+                                            size_t max_overhead, double growth_factor);
    };
 }

 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
-
-
--- a/src/core/include/megbrain/comp_node.h
+++ b/src/core/include/megbrain/comp_node.h
@@ -308,6 +308,14 @@ class CompNode {
         */
        static void try_coalesce_all_free_memory();

+        /*
+        * \brief specifies how to pre-allocate from raw dev allocator
+        *
+        */
+        static void set_prealloc_config(size_t alignment, size_t min_req,
+                                        size_t max_overhead, double growth_factor,
+                                        DeviceType device_type);
+
        /* =================== synchronization ======================== */

        class Event;