diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py
index 1455acb590552cd99f67f8e6824e87172be6f366..8d5ae7196167c7cffc1e97d0ebc1c22c9768454d 100644
--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -755,8 +755,10 @@ class trace:
 
         h2v = {}
         graph = G.Graph()
-        # only graph_opt_level takes effect in dump
-        self._apply_graph_options(graph)
+
+        # apply graph_opt_level in dump
+        if self._graph_opt_level is not None:
+            graph.options.graph_opt_level = self._graph_opt_level
 
         for i, h in enumerate(self._arg_bindings):
             info = self._tinfo[h]
diff --git a/imperative/python/test/unit/test_tracing.py b/imperative/python/test/unit/test_tracing.py
index 94b3c18994efff7962629ae8bf80358c1ab16e2c..9e637af657c858d72cc24c512ea97274b98d82e7 100644
--- a/imperative/python/test/unit/test_tracing.py
+++ b/imperative/python/test/unit/test_tracing.py
@@ -244,7 +244,6 @@ def test_goptions_log_sum_exp():
     np.testing.assert_almost_equal(g(d, o), val)
 
 
-@pytest.mark.skip(reason="could not use opt_level=0 with dump")
 def test_goptions_log_exp():
     @trace(symbolic=True, opt_level=0, capture_as_const=True)
     def f(x):
diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h
index 428e8ceb9e7321d2a4ff3f1ae5e79a052319ca69..004db94f4664b842edbf69e79ca34b836e6cb2e4 100644
--- a/src/core/include/megbrain/graph/cg.h
+++ b/src/core/include/megbrain/graph/cg.h
@@ -355,6 +355,14 @@ class ComputingGraph : public std::enable_shared_from_this<ComputingGraph>,
              */
             int16_t graph_opt_level = 2;
 
+            /*!
+             * disable inplace arith transformations during graph
+             *    construction
+             * it effectively disable level-1 graph optimization
+             * only for internal use during de-serialization
+             */
+            bool disable_inplace_arith_opt = false;
+
             /*!
              * max size of allreduce packs in MB
              * set this option to zero to disable PackAllReducePass
diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp
index 7570f714e3bf866164c305efeb911302604677c1..bf6f2ab7d271d55793f326ac15bdf7011e22f94e 100644
--- a/src/opr/impl/basic_arith.cpp
+++ b/src/opr/impl/basic_arith.cpp
@@ -221,7 +221,8 @@ SymbolVar Elemwise::make(const VarNodeArrayView& inputs, Param param,
                  trait.name, cg::dump_var_info(inputs).c_str());
 
 #if !MGB_BUILD_SLIM_SERVING
-    if (inputs[0]->owner_graph()->options().graph_opt_level) {
+    auto&& options = inputs[0]->owner_graph()->options();
+    if (options.graph_opt_level && !(options.disable_inplace_arith_opt)) {
         auto repl = gopt::optimize_elemwise_expr_inplace(dtp.get_vars(), param,
                                                          config);
         if (repl)
diff --git a/src/serialization/impl/serializer_oss.cpp b/src/serialization/impl/serializer_oss.cpp
index 1f2727c3263a015c0069ab22205707b5e9ecec6f..9935089b865992e7e51cea7915cdc586b9f0ec1e 100644
--- a/src/serialization/impl/serializer_oss.cpp
+++ b/src/serialization/impl/serializer_oss.cpp
@@ -756,9 +756,15 @@ void GraphLoaderOSS::OprLoadContextImpl::load_single_opr(
 GraphLoader::LoadResult GraphLoaderOSS::OprLoadContextImpl::load_oprs() {
     // load oprs
     const auto* oprs = m_loader->m_graph->oprs();
-    for (flatbuffers::uoffset_t i = 0; i < oprs->size(); ++i) {
-        m_current_opr = oprs->Get(i);
-        load_single_opr(m_current_opr);
+    {
+        // inplace arith graph optimization is disabled during opr load
+        // it tries to restore the same graph as it was dumped
+        // see test TestSerializer2.LOGEXP for example
+        GraphLoader::ScopedGraphOptDisabler _(m_graph);
+        for (flatbuffers::uoffset_t i = 0; i < oprs->size(); ++i) {
+            m_current_opr = oprs->Get(i);
+            load_single_opr(m_current_opr);
+        }
     }
 
     // batched loading device values
diff --git a/src/serialization/include/megbrain/serialization/serializer.h b/src/serialization/include/megbrain/serialization/serializer.h
index 30706e2a0212268a35c81cb82c2b6d43c9493e99..752fe7409f5901f69928b708acd0a8883460f963 100644
--- a/src/serialization/include/megbrain/serialization/serializer.h
+++ b/src/serialization/include/megbrain/serialization/serializer.h
@@ -61,6 +61,21 @@ namespace serialization {
                         const ComputingGraph::OutputSpec &outspec);
             };
 
+            //! helper to disable inplace arith graph optimization during
+            //! de-serialization
+            struct ScopedGraphOptDisabler {
+                bool option_saved;
+                std::shared_ptr<ComputingGraph> cg;
+                ScopedGraphOptDisabler(std::shared_ptr<ComputingGraph>& cg_p)
+                        : option_saved(true), cg(cg_p) {
+                    std::swap(option_saved,
+                              cg->options().disable_inplace_arith_opt);
+                }
+                ~ScopedGraphOptDisabler() {
+                    cg->options().disable_inplace_arith_opt = option_saved;
+                }
+            };
+
             //! mem_node => tensor_value
             using SharedTensorMapEntry =
                     ThinHashMap<MemNode, std::shared_ptr<DeviceTensorND>>;
diff --git a/src/serialization/test/serializer_oss.cpp b/src/serialization/test/serializer_oss.cpp
index bdb82734552dddea0b512a711228329269610a31..cc6a7dffbe4514fbeabd52af934abbc19de443d4 100644
--- a/src/serialization/test/serializer_oss.cpp
+++ b/src/serialization/test/serializer_oss.cpp
@@ -761,4 +761,41 @@ TEST(TestSerializer2, HasOutputDtype) {
     load();
 }
 
+TEST(TestSerializer2, LOGEXP) {
+    auto fname = GET_OUTPUT_FILE();
+    TensorShape shape{2, 3};
+    using Mode = opr::Elemwise::Mode;
+    bool inplace_opt = true;
+    auto dump = [&]() {
+        auto cn = CompNode::load("xpu0");
+        auto host_x = std::make_shared<HostTensorND>(cn, shape);
+        for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++i)
+            host_x->ptr<float>()[i] = 0.0;  // To avoid NAN
+        auto graph = ComputingGraph::make();
+        if (!inplace_opt)
+            graph->options().graph_opt_level = 0;
+        auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"});
+        auto y = opr::Elemwise::make({x}, Mode::EXP);
+        auto z = opr::Elemwise::make({y}, Mode::LOG);
+
+        auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
+                                        GraphDumpFormat::FLATBUFFERS);
+        auto rst = dumper->dump({z.rename("z"), z});
+        size_t expected_nr_opr = inplace_opt? 1: 3;
+        ASSERT_EQ(expected_nr_opr, rst.nr_opr);
+    };
+
+    auto load = [&]() {
+        auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
+                                        GraphDumpFormat::FLATBUFFERS);
+        auto rst = loader->load();
+    };
+
+    dump();
+    load();
+
+    inplace_opt = !inplace_opt;
+    dump();
+    load();
+}
 #endif