diff --git a/imperative/python/megengine/autodiff/grad_manager.py b/imperative/python/megengine/autodiff/grad_manager.py
index 72c689fd4453bcb4143d6955dacc1eef8c0a77f4..d2d73f3dde79f41673565701d6dc11ba26bf3bb1 100644
--- a/imperative/python/megengine/autodiff/grad_manager.py
+++ b/imperative/python/megengine/autodiff/grad_manager.py
@@ -1,6 +1,4 @@
 import weakref
-from collections import defaultdict
-from contextlib import contextmanager
 from typing import Callable, Iterable
 
 from ..core._imperative_rt.core2 import pop_scope, push_scope, set_option
diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py
index 53f91c60c5f514d60a2c96ab6c82d96685431278..a8c222f8942abe8e598b68c938b77a1adb5e3d97 100644
--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -1125,10 +1125,6 @@ def apply_compiled_mode(op: OpDef, *args: RawTensor):
 
 def apply_const_compiled_mode(value, dtype, device, is_const, no_cache, name):
     if skip_tracing:
-        args = [
-            RawTensor(x._dev_tensor()) if x.__class__ is CompiledTensorProxy else x
-            for x in args
-        ]
         unset_tracing()
         ret = RawTensor(value, dtype, device, False, name)
         set_tracing()
diff --git a/imperative/python/src/tensor.cpp b/imperative/python/src/tensor.cpp
index 1dfddc0fb4476c9f57d79da93b754b00f662f83b..8d44ae05c56f1174bbddb59160b53de6084d9493 100644
--- a/imperative/python/src/tensor.cpp
+++ b/imperative/python/src/tensor.cpp
@@ -50,29 +50,20 @@ REGISTE_APPLY_FUNC(cpp_apply_backward_varnode)
 
 #undef REGISTE_APPLY_FUNC
 
-bool is_tracing = false;
-
-#define SET_UNSET_PROP(mode)    \
-    void set_##mode() {         \
-        is_##mode = true;       \
-    }                           \
-    void unset_##mode() {       \
-        is_##mode = false;      \
-    }                           \
-
-SET_UNSET_PROP(tracing)
+Tensor::flags_t ApplyContext::global_disable = 0;
+Tensor::flags_t ApplyContext::global_enable = 0;
 
-#undef SET_UNSET_PROP
+void set_tracing() { ApplyContext::global_enable |= Tensor::Flags::TRACE; }
+void unset_tracing() { ApplyContext::global_enable &= ~Tensor::Flags::TRACE; }
 
 bool skip_tracing = false;
 
-Tensor::flags_t ApplyContext::global_disable = 0;
-
 apply_result_t apply(ApplyContext& ctx) {
     // emulating scalar should be put to specific op's apply, e.g.,
     // elementwise, reduce, typecvt. Currently it's still handled at python
     // side. It could be move to C++ side if it has an impact on performance
     auto flags = ctx.flags & ~ApplyContext::global_disable;
+    flags = flags | ApplyContext::global_enable;
 
     if (flags & Tensor::Flags::SCALAR) {
         // TODO: emulate scalar
@@ -190,10 +181,6 @@ PyObject* py_apply(PyObject* self, PyObject*const* args, size_t nargs/* , PyObje
             }
         }
 
-        if (is_tracing) {
-            ctx.flags |= Tensor::Flags::TRACE;
-        }
-
         auto outputs = apply(ctx);
         size_t nout = outputs.size();
         auto ret = py::tuple(nout);
@@ -255,7 +242,7 @@ TensorWrapper::TensorWrapper(PyObject* args, PyObject* kwargs) {
             if (tup[nargs - 1].ptr() != Py_None) name = tup[nargs - 1].cast<std::string>();
 
             // const op
-            if (is_const && is_tracing) {
+            if (is_const && (ApplyContext::global_enable == Tensor::Flags::TRACE)) {
                 auto py_ret = PyObject_Call(cpp_apply_const_with_tracing, tup.ptr(), nullptr);
                 if (!py_ret) throw py::error_already_set();
                 auto py_list = py::reinterpret_steal<py::list>(py_ret);
diff --git a/imperative/python/src/tensor.h b/imperative/python/src/tensor.h
index fe9541155d5e6d5d17541dd2a31aae1bc8c0b10d..89b5057527fd36e9933b0de7d2122b0a7cc4312a 100644
--- a/imperative/python/src/tensor.h
+++ b/imperative/python/src/tensor.h
@@ -193,8 +193,9 @@ PyObject* py_apply(PyObject* self, PyObject*const* args, size_t nargs/* , PyObje
 
 struct ApplyContext {
     static Tensor::flags_t global_disable;
+    static Tensor::flags_t global_enable;
 
-    Tensor::flags_t flags;
+    Tensor::flags_t flags = 0;
     std::shared_ptr<OpDef> op;
     Tensor*const* args;
     size_t nargs;
@@ -236,14 +237,11 @@ decltype(auto) resolve_arrow(T&& p) {
 template <typename... Args>
 constexpr bool is_all_tensor_ptr = (... && std::is_same_v<decltype(resolve_arrow(std::declval<Args>())), Tensor*>);
 
-extern bool is_tracing; // FIXME: should use ApplyContext::global_enable
-
 template <typename... Args, std::enable_if_t<is_all_tensor_ptr<Args...>, int> = 0>
 apply_result_t apply(std::shared_ptr<OpDef> op, Args&&... args) {
     ApplyContext ctx;
     Tensor* arg_arr[] = {resolve_arrow(args)...};
     ctx.flags = (0 | ... | args->m_flags);
-    ctx.flags |= is_tracing ? Tensor::Flags::TRACE : 0;
     ctx.args = arg_arr;
     ctx.nargs = sizeof...(args);
     ctx.op = std::move(op);
@@ -256,7 +254,6 @@ auto apply(std::shared_ptr<OpDef> op, T&& tensors)
                             apply_result_t> {
     ApplyContext ctx;
     ctx.op = std::move(op);
-    ctx.flags = is_tracing ? Tensor::Flags::TRACE : 0;
     ctx.nargs = tensors.size();
     Tensor* args[ctx.nargs];
     ctx.args = args;
@@ -270,7 +267,6 @@ auto apply(std::shared_ptr<OpDef> op, T&& tensors)
 inline auto apply(std::shared_ptr<OpDef> op, Tensor*const* args, size_t nargs) {
     ApplyContext ctx;
     ctx.op = std::move(op);
-    ctx.flags = is_tracing ? Tensor::Flags::TRACE : 0;
     ctx.nargs = nargs;
     ctx.args = args;
     for (size_t i = 0; i < nargs; ++i) {
diff --git a/imperative/python/src/trace.cpp b/imperative/python/src/trace.cpp
index 853a498febe5218f2de3c66aa1e3c37f408a6c04..30ddb78bdf1f167aa051ccfb4023c35388c98e5d 100644
--- a/imperative/python/src/trace.cpp
+++ b/imperative/python/src/trace.cpp
@@ -28,12 +28,12 @@ apply_result_t apply_trace(ApplyContext& ctx) {
         for (size_t i = 0; i < ctx.nargs; i++) {
             args[i + 1] = py::cast(ctx.args[i]->m_var);
         }
-        py::object ret = py::reinterpret_steal<py::object>(
+        py::object pyout = py::reinterpret_steal<py::object>(
                 PyObject_Call(cpp_apply_backward_varnode, args.ptr(), nullptr));
-        if (!ret) throw py::error_already_set();
+        if (!pyout) throw py::error_already_set();
 
         // assumption: python function always returns PyList
-        auto tup = py::reinterpret_borrow<py::list>(ret);
+        auto tup = py::reinterpret_borrow<py::list>(pyout);
         for (size_t i = 0; i < tup.size(); i++) {
             auto pitem = tup[i].cast<cg::VarNode*>();
             outputs.emplace_back(std::make_shared<Tensor>(pitem));
@@ -48,6 +48,7 @@ apply_result_t apply_trace(ApplyContext& ctx) {
     }
     auto pyout = PyObject_Call(cpp_apply_with_tracing, args.ptr(), nullptr);
     if (!pyout) throw py::error_already_set();
+
     // assumption: python function always returns PyList
     auto tup = py::reinterpret_steal<py::list>(pyout);
     for (size_t i = 0; i < tup.size(); i++) {