diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py
index 6fdc05a034f23f635d65229178af60f42606516f..95538320dfcfcd985fb920ac361874e9a8fda452 100644
--- a/imperative/python/megengine/device.py
+++ b/imperative/python/megengine/device.py
@@ -9,6 +9,7 @@
 import os
 
 from .core._imperative_rt.common import CompNode, DeviceType
+from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config
 
 __all__ = [
     "is_cuda_available",
@@ -16,6 +17,7 @@ __all__ = [
     "get_default_device",
     "set_default_device",
     "set_prealloc_config",
+    "DeviceType",
 ]
 
 
@@ -94,15 +96,15 @@ def set_prealloc_config(
     alignment: int = 1,
     min_req: int = 32 * 1024 * 1024,
     max_overhead: int = 0,
-    growth_factor: float = 2.0,
-    device_type: str = "gpu",
+    growth_factor=2.0,
+    device_type=DeviceType.CUDA,
 ):
-    """specifies how to pre-allocate from raw device allocator
+    """specifies how to pre-allocate from raw dev allocator
 
-    :param alignment: specifies the alignment in byte
-    :param min_req: min request size in byte
-    :param max_overhead: max overhead above required size in byte
-    :growth_factor: request size = growth_factor * current allocated size
+    :param alignment: specifies the alignment in bytes.
+    :param min_req: min request size in bytes.
+    :param max_overhead: max overhead above required size in bytes.
+    :growth_factor: request size / cur allocated
     :device_type: the device type
 
     """
@@ -110,5 +112,4 @@ def set_prealloc_config(
     assert min_req > 0
     assert max_overhead >= 0
     assert growth_factor >= 1
-    t = _str2device_type(device_type)
-    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t)
+    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type)
diff --git a/imperative/python/src/common.cpp b/imperative/python/src/common.cpp
index cdaaf81988e1bdb23118b8ec16c968309a60b5ba..aeb1f9e9ee6512575c9774629f3c923eb3a12dcf 100644
--- a/imperative/python/src/common.cpp
+++ b/imperative/python/src/common.cpp
@@ -165,6 +165,9 @@ void init_common(py::module m) {
             .value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
             .value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);
 
+    m.def("set_prealloc_config", &CompNode::set_prealloc_config, 
+        "specifies how to pre-allocate from raw dev allocator");
+
     init_npy_num_bfloat16(m);
     init_npy_num_intbx(m);
 }
diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp
index d5eb31342b902b9c8642e8d00d5b2f9e531575dd..34a3ee8c0b6d96dfa6ab96d343c416d31c40051d 100644
--- a/src/core/impl/comp_node/comp_node.cpp
+++ b/src/core/impl/comp_node/comp_node.cpp
@@ -12,6 +12,8 @@
 #include "megbrain/comp_node.h"
 #include "megbrain/comp_node_env.h"
 #include "megbrain/graph/exc_extra_info.h"
+#include "megbrain/common.h"
+#include "megbrain/comp_node/alloc.h"
 
 #include "./cuda/comp_node.h"
 #include "./cpu/comp_node.h"
@@ -420,6 +422,21 @@ void CompNode::activate() const {
     static_cast<Impl*>(m_impl)->env().activate();
 }
 
+void CompNode::set_prealloc_config(
+    size_t alignment, 
+    size_t min_req, 
+    size_t max_overhead, 
+    double growth_factor, 
+    DeviceType device_type) {
+    switch (device_type) {
+        case DeviceType::CUDA:
+            CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor);
+            break;
+        default:
+            mgb_log_warn("unsupported device type for set_prealloc_config");
+    };
+}
+
 void* CompNode::alloc_device(size_t size) const {
     auto ret = m_impl->alloc_device(size);
     static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret);
diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp
index b313f87ff149e265b7c6479b0298d6337a8d1200..6fb157f488be45f301084b156ff3d40dbef1f04f 100644
--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req,
             using T = CudaCompNodeImpl::StaticData;
             static std::aligned_storage_t<sizeof(T), alignof(T)> storage;
             sdptr = new(&storage)T;
-            MGB_LOCK_GUARD(sdptr->mtx);
             sdptr->prealloc_config.alignment = alignment;
             sdptr->prealloc_config.min_req = min_req;
             sdptr->prealloc_config.growth_factor = growth_factor;
             sdptr->prealloc_config.max_overhead = max_overhead;
         } else {
             mgb_log_warn(
-                "failed to invoke set_prealloc_config; fallback to default configuration; "
-                "prealloc_config should be specified before any invocation of load_cuda");
+                "invalid call to set_prealloc_config, will fallback to "
+                "default config; "
+                "prealloc_config should be specified before any CUDA "
+                "memory allocation");
         }
     }
 }
@@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) {
 void CudaCompNode::sync_all() {
 }
 
+void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, 
+                                       size_t max_overhead,
+                                       double growth_factor) {}
+
 #undef err
 
 #endif // MGB_CUDA
diff --git a/src/core/impl/comp_node/cuda/comp_node.h b/src/core/impl/comp_node/cuda/comp_node.h
index afb30ba5e8badc148d9ba85ef3bf5fe1c3a4e40b..f394e5603068077c115be43bcb02460e1a002145 100644
--- a/src/core/impl/comp_node/cuda/comp_node.h
+++ b/src/core/impl/comp_node/cuda/comp_node.h
@@ -32,9 +32,10 @@ namespace mgb {
             static Impl* load_cuda(
                     const Locator &locator, const Locator &locator_logical);
             static void sync_all();
+
+            static void set_prealloc_config(size_t alignment, size_t min_req,
+                                            size_t max_overhead, double growth_factor);
     };
 }
 
 // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
-
-
diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h
index b9fcee3fc09c51ef83bc42a2e9e0e58448f8ba3e..9cf376a9418cee5e4b6ea917cfa39e13925d2531 100644
--- a/src/core/include/megbrain/comp_node.h
+++ b/src/core/include/megbrain/comp_node.h
@@ -308,6 +308,14 @@ class CompNode {
          */
         static void try_coalesce_all_free_memory();
 
+        /*
+        * \brief specifies how to pre-allocate from raw dev allocator
+        *
+        */
+        static void set_prealloc_config(size_t alignment, size_t min_req,
+                                        size_t max_overhead, double growth_factor,
+                                        DeviceType device_type);
+
         /* =================== synchronization ======================== */
 
         class Event;