diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py index 1455acb590552cd99f67f8e6824e87172be6f366..8d5ae7196167c7cffc1e97d0ebc1c22c9768454d 100644 --- a/imperative/python/megengine/jit/tracing.py +++ b/imperative/python/megengine/jit/tracing.py @@ -755,8 +755,10 @@ class trace: h2v = {} graph = G.Graph() - # only graph_opt_level takes effect in dump - self._apply_graph_options(graph) + + # apply graph_opt_level in dump + if self._graph_opt_level is not None: + graph.options.graph_opt_level = self._graph_opt_level for i, h in enumerate(self._arg_bindings): info = self._tinfo[h] diff --git a/imperative/python/test/unit/test_tracing.py b/imperative/python/test/unit/test_tracing.py index 94b3c18994efff7962629ae8bf80358c1ab16e2c..9e637af657c858d72cc24c512ea97274b98d82e7 100644 --- a/imperative/python/test/unit/test_tracing.py +++ b/imperative/python/test/unit/test_tracing.py @@ -244,7 +244,6 @@ def test_goptions_log_sum_exp(): np.testing.assert_almost_equal(g(d, o), val) -@pytest.mark.skip(reason="could not use opt_level=0 with dump") def test_goptions_log_exp(): @trace(symbolic=True, opt_level=0, capture_as_const=True) def f(x): diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h index 428e8ceb9e7321d2a4ff3f1ae5e79a052319ca69..004db94f4664b842edbf69e79ca34b836e6cb2e4 100644 --- a/src/core/include/megbrain/graph/cg.h +++ b/src/core/include/megbrain/graph/cg.h @@ -355,6 +355,14 @@ class ComputingGraph : public std::enable_shared_from_this, */ int16_t graph_opt_level = 2; + /*! + * disable inplace arith transformations during graph + * construction + * it effectively disable level-1 graph optimization + * only for internal use during de-serialization + */ + bool disable_inplace_arith_opt = false; + /*! * max size of allreduce packs in MB * set this option to zero to disable PackAllReducePass diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp index 7570f714e3bf866164c305efeb911302604677c1..bf6f2ab7d271d55793f326ac15bdf7011e22f94e 100644 --- a/src/opr/impl/basic_arith.cpp +++ b/src/opr/impl/basic_arith.cpp @@ -221,7 +221,8 @@ SymbolVar Elemwise::make(const VarNodeArrayView& inputs, Param param, trait.name, cg::dump_var_info(inputs).c_str()); #if !MGB_BUILD_SLIM_SERVING - if (inputs[0]->owner_graph()->options().graph_opt_level) { + auto&& options = inputs[0]->owner_graph()->options(); + if (options.graph_opt_level && !(options.disable_inplace_arith_opt)) { auto repl = gopt::optimize_elemwise_expr_inplace(dtp.get_vars(), param, config); if (repl) diff --git a/src/serialization/impl/serializer_oss.cpp b/src/serialization/impl/serializer_oss.cpp index 1f2727c3263a015c0069ab22205707b5e9ecec6f..9935089b865992e7e51cea7915cdc586b9f0ec1e 100644 --- a/src/serialization/impl/serializer_oss.cpp +++ b/src/serialization/impl/serializer_oss.cpp @@ -756,9 +756,15 @@ void GraphLoaderOSS::OprLoadContextImpl::load_single_opr( GraphLoader::LoadResult GraphLoaderOSS::OprLoadContextImpl::load_oprs() { // load oprs const auto* oprs = m_loader->m_graph->oprs(); - for (flatbuffers::uoffset_t i = 0; i < oprs->size(); ++i) { - m_current_opr = oprs->Get(i); - load_single_opr(m_current_opr); + { + // inplace arith graph optimization is disabled during opr load + // it tries to restore the same graph as it was dumped + // see test TestSerializer2.LOGEXP for example + GraphLoader::ScopedGraphOptDisabler _(m_graph); + for (flatbuffers::uoffset_t i = 0; i < oprs->size(); ++i) { + m_current_opr = oprs->Get(i); + load_single_opr(m_current_opr); + } } // batched loading device values diff --git a/src/serialization/include/megbrain/serialization/serializer.h b/src/serialization/include/megbrain/serialization/serializer.h index 30706e2a0212268a35c81cb82c2b6d43c9493e99..752fe7409f5901f69928b708acd0a8883460f963 100644 --- a/src/serialization/include/megbrain/serialization/serializer.h +++ b/src/serialization/include/megbrain/serialization/serializer.h @@ -61,6 +61,21 @@ namespace serialization { const ComputingGraph::OutputSpec &outspec); }; + //! helper to disable inplace arith graph optimization during + //! de-serialization + struct ScopedGraphOptDisabler { + bool option_saved; + std::shared_ptr cg; + ScopedGraphOptDisabler(std::shared_ptr& cg_p) + : option_saved(true), cg(cg_p) { + std::swap(option_saved, + cg->options().disable_inplace_arith_opt); + } + ~ScopedGraphOptDisabler() { + cg->options().disable_inplace_arith_opt = option_saved; + } + }; + //! mem_node => tensor_value using SharedTensorMapEntry = ThinHashMap>; diff --git a/src/serialization/test/serializer_oss.cpp b/src/serialization/test/serializer_oss.cpp index bdb82734552dddea0b512a711228329269610a31..cc6a7dffbe4514fbeabd52af934abbc19de443d4 100644 --- a/src/serialization/test/serializer_oss.cpp +++ b/src/serialization/test/serializer_oss.cpp @@ -761,4 +761,41 @@ TEST(TestSerializer2, HasOutputDtype) { load(); } +TEST(TestSerializer2, LOGEXP) { + auto fname = GET_OUTPUT_FILE(); + TensorShape shape{2, 3}; + using Mode = opr::Elemwise::Mode; + bool inplace_opt = true; + auto dump = [&]() { + auto cn = CompNode::load("xpu0"); + auto host_x = std::make_shared(cn, shape); + for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++i) + host_x->ptr()[i] = 0.0; // To avoid NAN + auto graph = ComputingGraph::make(); + if (!inplace_opt) + graph->options().graph_opt_level = 0; + auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}); + auto y = opr::Elemwise::make({x}, Mode::EXP); + auto z = opr::Elemwise::make({y}, Mode::LOG); + + auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()), + GraphDumpFormat::FLATBUFFERS); + auto rst = dumper->dump({z.rename("z"), z}); + size_t expected_nr_opr = inplace_opt? 1: 3; + ASSERT_EQ(expected_nr_opr, rst.nr_opr); + }; + + auto load = [&]() { + auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()), + GraphDumpFormat::FLATBUFFERS); + auto rst = loader->load(); + }; + + dump(); + load(); + + inplace_opt = !inplace_opt; + dump(); + load(); +} #endif