diff --git a/imperative/python/megengine/dtr/dtr.py b/imperative/python/megengine/dtr/dtr.py
index 5f043d3cef009b9eff6444769884cc76353d91b4..e17a9d1a2f7ec2f9437571f8fddf279445f8d7f2 100644
--- a/imperative/python/megengine/dtr/dtr.py
+++ b/imperative/python/megengine/dtr/dtr.py
@@ -118,7 +118,6 @@ def enable_sqrt_sampling(mod, value: bool):
 
 def enable():
     r"""Enable to record computing path of tensors and to perform DTR policy."""
-    _set_defrag(True)
     _set_option("enable_dtr_auto_drop", 1)
     _set_option("enable_drop", 1)
     _set_option("buffer_length", 0)
@@ -127,7 +126,6 @@ def enable():
 
 def disable():
     r"""Stop recording computing path of tensors and performing DTR policy."""
-    _set_defrag(False)
     _set_option("enable_dtr_auto_drop", 0)
     _set_option("enable_drop", 0)
     _set_option("record_computing_path", 0)
diff --git a/imperative/python/src/tensor.cpp b/imperative/python/src/tensor.cpp
index 45dc0f5f313c385b19def123f2b4113cf9f0948e..6a90f3c829e917404d28b9c9ce1bcf755d7e11d7 100644
--- a/imperative/python/src/tensor.cpp
+++ b/imperative/python/src/tensor.cpp
@@ -605,14 +605,6 @@ PyObject* TensorWrapper::_dev_tensor() {
     return py::cast(dev_tensor).release().ptr();
 }
 
-void TensorWrapper::_swap_out() {
-    interpreter_for_py->swap_out(m_tensor->m_handle.get());
-}
-
-void TensorWrapper::_swap_in() {
-    interpreter_for_py->swap_in(m_tensor->m_handle.get());
-}
-
 void TensorWrapper::_drop() {
     interpreter_for_py->drop(m_tensor->m_handle.get());
 }
@@ -931,8 +923,6 @@ void init_tensor(py::module m) {
                     .def<&TensorWrapper::unsetscalar>("_unsetscalar")
                     .def<&TensorWrapper::detach>("detach")
                     .def<&TensorWrapper::_dev_tensor>("_dev_tensor")
-                    .def<&TensorWrapper::_swap_out>("_swap_out")
-                    .def<&TensorWrapper::_swap_in>("_swap_in")
                     .def<&TensorWrapper::_drop>("_drop")
                     .def<&TensorWrapper::reset_varnode>("_reset_varnode")
                     .def<&TensorWrapper::_use_cnt>("_use_cnt")
@@ -1032,8 +1022,6 @@ void init_tensor(py::module m) {
     });
     m.def("get_option",
           [](std::string name) { return interpreter_for_py->get_option(name); });
-    m.def("_set_swap_flag",
-          [](bool flag) { interpreter_for_py->set_option("enable_swap", flag); });
     m.def("_set_drop_flag",
           [](bool flag) { interpreter_for_py->set_option("enable_drop", flag); });
     m.def("config_async_level", [](int level) {
diff --git a/imperative/python/src/tensor.h b/imperative/python/src/tensor.h
index 1d0652a919619aed115d2893b8e8bf492339674d..7caf8fb1792dcad843c907baebba679e16785abd 100644
--- a/imperative/python/src/tensor.h
+++ b/imperative/python/src/tensor.h
@@ -194,8 +194,6 @@ struct TensorWrapper {
     void setscalar();
     void unsetscalar();
     PyObject* _dev_tensor();
-    void _swap_in();
-    void _swap_out();
     void _drop();
     PyObject* varnode();
     void reset_varnode();
diff --git a/imperative/python/test/integration/test_converge_with_swap_and_drop.py b/imperative/python/test/integration/test_converge_with_drop.py
similarity index 93%
rename from imperative/python/test/integration/test_converge_with_swap_and_drop.py
rename to imperative/python/test/integration/test_converge_with_drop.py
index 468b464af715c0b71ad57198a5989f6f8c7bc942..5e31e3658f1c6511d38177a21c7d0d122d024db6 100644
--- a/imperative/python/test/integration/test_converge_with_swap_and_drop.py
+++ b/imperative/python/test/integration/test_converge_with_drop.py
@@ -14,12 +14,7 @@ import megengine as mge
 import megengine.autodiff as ad
 import megengine.functional as F
 from megengine import Tensor
-from megengine.core._imperative_rt.core2 import (
-    _set_drop_flag,
-    _set_swap_flag,
-    get_option,
-    set_option,
-)
+from megengine.core._imperative_rt.core2 import _set_drop_flag, get_option, set_option
 from megengine.module import Linear, Module
 from megengine.optimizer import SGD
 
@@ -70,7 +65,6 @@ class XORNet(Module):
 
     def forward(self, x):
         y = self.fc0(x)
-        x._swap_out()
         x = F.tanh(y)
         y = self.fc1(x)
         x = F.tanh(y)
@@ -80,8 +74,7 @@ class XORNet(Module):
         return y
 
 
-def test_training_converge_with_swap_and_drop():
-    _set_swap_flag(True)
+def test_training_converge_with_drop():
     _set_drop_flag(True)
     old_buffer_length = get_option("buffer_length")
     set_option("buffer_length", 0)
@@ -125,6 +118,5 @@ def test_training_converge_with_swap_and_drop():
         precision
     )
 
-    _set_swap_flag(False)
     _set_drop_flag(False)
     set_option("buffer_length", old_buffer_length)
diff --git a/imperative/python/test/unit/core/test_interpreter.py b/imperative/python/test/unit/core/test_interpreter.py
index 81d80f3bfe1b9e4f396422204a65f3353cd009b6..3bbb5cafeef3fd8485f515626141e4c7f0bef0eb 100644
--- a/imperative/python/test/unit/core/test_interpreter.py
+++ b/imperative/python/test/unit/core/test_interpreter.py
@@ -9,7 +9,6 @@ import megengine.functional as F
 from megengine.core._imperative_rt.core2 import (
     AsyncError,
     _set_drop_flag,
-    _set_swap_flag,
     config_async_level,
     get_async_level,
 )
@@ -61,24 +60,20 @@ def test_host_compute_elemwise():
         d = F.reshape(a, c)
 
 
-def test_swap_drop_basic():
-    _set_swap_flag(True)
+def test_drop_basic():
     _set_drop_flag(True)
     # test xpu compute
     x = mge.tensor(np.ones((3, 3)), dtype=np.float32)
     y = mge.tensor(np.ones((3, 3)), dtype=np.float32)
     z = x + y
-    x._swap_out()
     z._drop()
     z.numpy()
     # test host value compute
     x = mge.tensor(np.ones((2, 2)), dtype=np.float32)
     y = mge.tensor(np.ones((2, 2)), dtype=np.float32)
     z = x + y
-    x._swap_out()
     z._drop()
     z.numpy()
-    _set_swap_flag(False)
     _set_drop_flag(False)
 
 
diff --git a/imperative/src/impl/interpreter/commands.h b/imperative/src/impl/interpreter/commands.h
index 8653dd0bb0b22a894f50b0ab4046ee235e3e5424..95b98080659b48e538eaee5ce3055b54c2bb1c78 100644
--- a/imperative/src/impl/interpreter/commands.h
+++ b/imperative/src/impl/interpreter/commands.h
@@ -84,28 +84,6 @@ struct GetValue {
     const char* get_name() const { return "GetValue"; }
 };
 
-struct SwapIn {
-    TensorInfo* dest;
-
-    template <typename TFunctor>
-    void get_props(TFunctor&& functor) const {
-        functor("dest", dest);
-    }
-
-    const char* get_name() const { return "SwapIn"; }
-};
-
-struct SwapOut {
-    TensorInfo* dest;
-
-    template <typename TFunctor>
-    void get_props(TFunctor&& functor) const {
-        functor("dest", dest);
-    }
-
-    const char* get_name() const { return "SwapOut"; }
-};
-
 struct Drop {
     TensorInfo* dest;
 
@@ -171,8 +149,8 @@ struct PopScope {
 };
 
 using CommandData = std::variant<
-        Put, ApplyOp, Del, GetValue, SwapIn, SwapOut, Drop, SetOption, StartProfile,
-        StopProfile, PushScope, PopScope>;
+        Put, ApplyOp, Del, GetValue, Drop, SetOption, StartProfile, StopProfile,
+        PushScope, PopScope>;
 
 struct Command {
     uint64_t id;
diff --git a/imperative/src/impl/interpreter/interpreter_impl.cpp b/imperative/src/impl/interpreter/interpreter_impl.cpp
index 3d84340ccc5961d4af9aae8ec1565cd0b3aa9537..f3f98dc3bea52adb1aa93ff382096597089279b3 100644
--- a/imperative/src/impl/interpreter/interpreter_impl.cpp
+++ b/imperative/src/impl/interpreter/interpreter_impl.cpp
@@ -197,32 +197,6 @@ void ChannelImpl::del_impl(Handle handle) {
     m_buffer.enqueue(Del{info});
 }
 
-void ChannelImpl::swap_in(Handle handle) {
-    MGB_LOCK_GUARD(m_spin);
-    mgb_assert(check_available(), "Channel already closed");
-    auto& state = get_channel_state();
-    if (state.options.enable_swap) {
-        mgb_assert(
-                m_valid_handle.find(handle) != m_valid_handle.end(),
-                "invalid handle: %p", handle);
-        auto* info = reinterpret_cast<TensorInfo*>(handle);
-        m_buffer.enqueue(SwapIn{info});
-    }
-}
-
-void ChannelImpl::swap_out(Handle handle) {
-    MGB_LOCK_GUARD(m_spin);
-    mgb_assert(check_available(), "Channel already closed");
-    auto& state = get_channel_state();
-    if (state.options.enable_swap) {
-        mgb_assert(
-                m_valid_handle.find(handle) != m_valid_handle.end(),
-                "invalid handle: %p", handle);
-        auto* info = reinterpret_cast<TensorInfo*>(handle);
-        m_buffer.enqueue(SwapOut{info});
-    }
-}
-
 void ChannelImpl::drop(Handle handle) {
     MGB_LOCK_GUARD(m_spin);
     mgb_assert(check_available(), "Channel already closed");
@@ -266,7 +240,7 @@ void ChannelImpl::dispatch_default_cpu(
                 input_tensornds.emplace_back(
                         info->ptr->get_value().proxy_to_default_cpu());
             } else {
-                // It's OK for SwapOut. We assign h_value before drop ptr
+                // We assign h_value before drop ptr
                 mgb_assert(!info->h_value.empty(), "inp->h_value is empty!");
                 input_tensornds.emplace_back(info->h_value.proxy_to_default_cpu());
             }
@@ -660,10 +634,6 @@ void ChannelImpl::regenerate(TensorInfo* dest) {
                  "dtr"});
         if (!m_applying)
             flush_apply_stack();
-    } else if (dest->evict_type == EvictType::SWAP) {
-        MGB_RECORD_EVENT(TensorCommandEvent, dest->id, TensorCommandKind::ReGen);
-        produce_tensor(dest, Tensor::make(dest->h_value));
-        MGB_RECORD_EVENT(TensorCommandFinishEvent, dest->id, TensorCommandKind::ReGen);
     }
 }
 
@@ -1185,29 +1155,6 @@ void ChannelImpl::process_one_task(Command& icmd) {
             MGB_LOCK_GUARD(m_mutex);
             notify_tensor_unsafe(cmd.dest);
             imperative_log_profile_end("GetValue");
-        } else if constexpr (std::is_same_v<T, SwapIn>) {
-            if (cmd.dest->invalid)
-                return;
-            MGB_RECORD_EVENT(
-                    TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapIn);
-            produce_tensor(cmd.dest, Tensor::make(cmd.dest->h_value));
-            MGB_RECORD_EVENT(
-                    TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapIn);
-            sample_on_device(cmd.dest->desc.comp_node, false);
-        } else if constexpr (std::is_same_v<T, SwapOut>) {
-            if (cmd.dest->invalid)
-                return;
-            MGB_RECORD_EVENT(
-                    TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapOut);
-            cmd.dest->h_value = cmd.dest->ptr->get_value();
-            if (cmd.dest->evict_type == EvictType::NONE) {
-                cmd.dest->evict_type = EvictType::SWAP;
-                cmd.dest->status = TensorInfo::Swapped;
-                release_tensor(cmd.dest);
-            }
-            MGB_RECORD_EVENT(
-                    TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapOut);
-            sample_on_device(cmd.dest->desc.comp_node, false);
         } else if constexpr (std::is_same_v<T, Drop>) {
             if (cmd.dest->invalid)
                 return;
@@ -1223,7 +1170,7 @@ void ChannelImpl::process_one_task(Command& icmd) {
             for (auto* info : cmd.capture_tensors) {
                 MGB_RECORD_EVENT(TensorDeclareEvent, info->id, info->name);
                 if (info->status == TensorInfo::Produced) {
-                    // TODO: handle swap/drop
+                    // TODO: handle drop
                     MGB_RECORD_EVENT(
                             TensorProduceEvent, info->id, info->desc.layout,
                             info->desc.comp_node, info->ptr->dev_tensor().raw_ptr());
@@ -1387,9 +1334,7 @@ auto ChannelImpl::CommandBuffer::find_last_usage(TensorInfo* dest, Range range)
                         if (cmd.dest == dest) {
                             found = iter;
                         }
-                    } else if constexpr (
-                            std::is_same_v<T, SwapIn> || std::is_same_v<T, SwapOut> ||
-                            std::is_same_v<T, Drop>) {
+                    } else if constexpr (std::is_same_v<T, Drop>) {
                         // TODO: ignore swap-like commands, just remove them from buffer
                         if (cmd.dest == dest) {
                             found = iter;
diff --git a/imperative/src/impl/interpreter/interpreter_impl.h b/imperative/src/impl/interpreter/interpreter_impl.h
index 34019db10f2f9816929bccd6820e0534b874657d..a786130b18028e2e0d9ff8abf1ca49d0c272001d 100644
--- a/imperative/src/impl/interpreter/interpreter_impl.h
+++ b/imperative/src/impl/interpreter/interpreter_impl.h
@@ -46,8 +46,6 @@ struct ChannelImpl : Interpreter::Channel {
     Handle put(const DeviceTensorND& value, const HostTensorND& hvalue) override;
 
     void del(Handle) override;
-    void swap_in(Handle) override;
-    void swap_out(Handle) override;
     void drop(Handle) override;
 
     SmallVector<Handle> apply_op(
diff --git a/imperative/src/impl/interpreter/option_manager.h b/imperative/src/impl/interpreter/option_manager.h
index 5b81cbcbf96845f593cc1a4e7da58a418153e021..af9548862009d20ac7fc8748952777f51f4ee454 100644
--- a/imperative/src/impl/interpreter/option_manager.h
+++ b/imperative/src/impl/interpreter/option_manager.h
@@ -35,7 +35,6 @@ public:
             "level 2: both device and user side errors are async;\n"
             "level 1: user side errors are sync;\n"
             "level 0: both sync.");
-    DEF_OPTION(enable_swap, "MEGENGINE_ENABLE_SWAP", 0, "");
     DEF_OPTION(enable_drop, "MEGENGINE_ENABLE_DROP", 0, "");
     DEF_OPTION(max_recompute_time, "MEGENGINE_MAX_RECOMP_TIME", 1, "");
     DEF_OPTION(
diff --git a/imperative/src/impl/interpreter/tensor_info.h b/imperative/src/impl/interpreter/tensor_info.h
index 0de89696e9d960e9ce80d4f139749debb9cee592..38bff09148cb1ac3df8a272404b4512e8a4d96fd 100644
--- a/imperative/src/impl/interpreter/tensor_info.h
+++ b/imperative/src/impl/interpreter/tensor_info.h
@@ -21,8 +21,7 @@ namespace interpreter::intl {
 
 enum EvictType {
     NONE = 0,
-    SWAP = 1,
-    DROP = 2,
+    DROP = 1,
 };
 
 /*!
@@ -49,7 +48,6 @@ struct TensorInfo {
         InvalidStatus,
         Allocated,
         Produced,
-        Swapped,
         Dropped,
         Deleted,
     };
@@ -75,9 +73,7 @@ struct TensorInfo {
     // Status should be only modified in worker thread
     Status status = InvalidStatus;
 
-    // Used by HostCompute and Memory Swap.
-    // HostCompute and Swap does not happen in one thread.
-    // Maybe a barrier is needed.
+    // Used by HostCompute
     HostTensorND h_value;
 
     // reserved for auto drop
diff --git a/imperative/src/impl/profiler/chrome_timeline.cpp b/imperative/src/impl/profiler/chrome_timeline.cpp
index 3e853a7f43968404253a18aaed9f060cf432e60e..775ac85c09110179f90570d4e996c145fa323157 100644
--- a/imperative/src/impl/profiler/chrome_timeline.cpp
+++ b/imperative/src/impl/profiler/chrome_timeline.cpp
@@ -232,10 +232,6 @@ struct ChromeTimelineEventVisitor : EventVisitor<ChromeTimelineEventVisitor> {
                 return "Drop";
             case TensorCommandKind::Del:
                 return "Del";
-            case TensorCommandKind::SwapIn:
-                return "SwapIn";
-            case TensorCommandKind::SwapOut:
-                return "SwapOut";
             case TensorCommandKind::RecFree:
                 return "RecFree";
             case TensorCommandKind::ReGen:
diff --git a/imperative/src/impl/profiler/events.h b/imperative/src/impl/profiler/events.h
index 3d8293427663d57cd2ac0c461eefdf79860dd521..9bba25cc0d1cdbe8e5537b92fa562ffc2b6555de 100644
--- a/imperative/src/impl/profiler/events.h
+++ b/imperative/src/impl/profiler/events.h
@@ -156,16 +156,7 @@ DEF_DUR_EVENT(StartProfile, { size_t capture_count; });
 
 DEF_DUR_EVENT(StopProfile, { size_t escape_count; });
 
-enum class TensorCommandKind {
-    Put,
-    Del,
-    SwapIn,
-    SwapOut,
-    Drop,
-    ReGen,
-    RecFree,
-    GetValue
-};
+enum class TensorCommandKind { Put, Del, Drop, ReGen, RecFree, GetValue };
 
 DEF_DUR_EVENT(TensorCommand, {
     using Kind = TensorCommandKind;
diff --git a/imperative/src/include/megbrain/imperative/interpreter.h b/imperative/src/include/megbrain/imperative/interpreter.h
index 92be1b4f7700db03e6922b13345277ef853e1cf2..88a28bf782e9e60d1b1bbb4c72fe8c3f5e33c26a 100644
--- a/imperative/src/include/megbrain/imperative/interpreter.h
+++ b/imperative/src/include/megbrain/imperative/interpreter.h
@@ -39,8 +39,6 @@ struct Interpreter {
         virtual Handle put(const DeviceTensorND& value, const HostTensorND& hvalue) = 0;
 
         virtual void del(Handle) = 0;
-        virtual void swap_in(Handle) = 0;
-        virtual void swap_out(Handle) = 0;
         virtual void drop(Handle) = 0;
 
         virtual SmallVector<Handle> apply_op(