From 4f0e6eae35c6daa499e9bb1cf9faadce8c36039f Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Mon, 29 Mar 2021 15:22:08 +0800
Subject: [PATCH] fix(mge/trace): re-open gopt level in trace

GitOrigin-RevId: 5ebc712690eb65d2c6e4fe08b6353cd403f18cea
---
 imperative/python/megengine/jit/tracing.py    | 22 +++-------
 .../python/test/unit/jit/test_tracing.py      | 44 ++++++++++++++++++-
 imperative/src/impl/ops/elemwise.cpp          |  4 +-
 3 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py
index 08631bc2..203efe0a 100644
--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -131,7 +131,7 @@ class trace:
     :param sublinear_memory_config: configuration for sublinear memory optimization.
         If not None, it enables sublinear memory optimization with given setting.
     :param profiling: whether to profile compiled trace. Default: False
-    :param opt_level: optimization level for compiling trace.
+    :param opt_level: optimization level for compiling trace. Default: 2
     :param symbolic_shape: whether to use symbolic shape for tracing. Default: True
     """
 
@@ -147,7 +147,7 @@ class trace:
         capture_as_const=False,
         sublinear_memory_config: SublinearMemoryConfig = None,
         profiling: bool = False,
-        opt_level: int = None,
+        opt_level: int = 2,
         symbolic_shape: bool = True,
     ):
         self.__wrapped__ = function
@@ -377,11 +377,7 @@ class trace:
         )
         readers = [G.OutputNode(x()._varnode).outputs[0] for x in lazy_eval_tensors]
         self._apply_graph_options(lazy_eval_graph)
-        # FIXME
-        if self._graph_opt_level is not None:
-            lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
-        else:
-            lazy_eval_graph.options.graph_opt_level = 2
+        lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
         lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers])
         lazy_eval_graph.compile(*lazy_eval_links, *readers)
         lazy_eval_graph()
@@ -500,11 +496,7 @@ class trace:
 
         graph.options.no_force_inplace = True
         graph.options.seq_opt.enable_seq_comp_node_opt = False
-        # graph opt level
-        # if self._graph_opt_level is not None:
-        #     graph.options.graph_opt_level = self._graph_opt_level
-        # FIXME
-        graph.options.graph_opt_level = 0
+        graph.options.graph_opt_level = self._graph_opt_level
         # sublinear
         if self._sublinear_memory_config is not None:
             graph.options.enable_sublinear_memory_opt = True
@@ -634,11 +626,7 @@ class trace:
                     opnode = info.shape_reader = G.AttrOutputNode(v, *in_out_links)
                     add_reader(opnode)
 
-        # FIXME
-        if self._graph_opt_level is not None:
-            graph.options.graph_opt_level = self._graph_opt_level
-        else:
-            graph.options.graph_opt_level = 2
+        graph.options.graph_opt_level = self._graph_opt_level
         graph._set_priority_to_id([*readers, *in_out_links, *io_links])
         graph.compile(*readers, *in_out_links, *io_links)
 
diff --git a/imperative/python/test/unit/jit/test_tracing.py b/imperative/python/test/unit/jit/test_tracing.py
index 03bd1dca..44ac4f04 100644
--- a/imperative/python/test/unit/jit/test_tracing.py
+++ b/imperative/python/test/unit/jit/test_tracing.py
@@ -113,6 +113,48 @@ def test_exclude_from_trace(trace_mode):
         np.testing.assert_equal(f(x).numpy(), y)
 
 
+@pytest.mark.parametrize("trace_mode", [False, True])
+def test_elemwise_fuse(trace_mode):
+    # explicitly declare opt_level as 2
+    @trace(symbolic=trace_mode, opt_level=2)
+    def f(a, b):
+        base = 0
+        c = b - a
+        _, idx = F.topk(c, 3)
+        # internally, biased_idx will be idx as gopt will ignore the addition
+        biased_idx = base + idx
+        return biased_idx
+
+    a = tensor(np.ones((7, 2)), dtype=np.int32)
+    b = tensor(2 * np.ones((7, 2)), dtype=np.float32)
+
+    for i in range(3):
+        y = f(a, b)
+        y.numpy()
+
+
+@pytest.mark.parametrize("trace_mode", [False, True])
+def test_elemwise_fuse_in_grad(trace_mode):
+    w = Parameter(np.ones([4, 6]), dtype="float32")
+
+    gm = GradManager().attach(w)
+    opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)
+
+    # explicitly declare opt_level as 2
+    @trace(symbolic=trace_mode, opt_level=2)
+    def f():
+        with gm:
+            wm = F.sum(w ** 2, axis=1) ** 0.5
+            loss = wm.mean()
+            gm.backward(loss)
+            opt.step().clear_grad()
+        return loss
+
+    for i in range(3):
+        y = f()
+        y.numpy()
+
+
 def test_print_in_trace():
     for symbolic in [False]:  # cannot read value in symbolic mode
 
@@ -221,7 +263,6 @@ def test_trace_profiler(trace_mode):
     assert out.get("profiler")
 
 
-@pytest.mark.skip(reason="force opt_level=0 when building graph")
 def test_goptions():
     @trace(symbolic=True, opt_level=0, capture_as_const=True)
     def f(x):
@@ -240,7 +281,6 @@ def test_goptions():
     np.testing.assert_equal(g(d).numpy().item(), 1.0)
 
 
-@pytest.mark.skip(reason="force opt_level=0 when building graph")
 def test_goptions_log_sum_exp():
     @trace(symbolic=True, opt_level=0, capture_as_const=True)
     def f(x, y):
diff --git a/imperative/src/impl/ops/elemwise.cpp b/imperative/src/impl/ops/elemwise.cpp
index 15f54af7..b8a50c43 100644
--- a/imperative/src/impl/ops/elemwise.cpp
+++ b/imperative/src/impl/ops/elemwise.cpp
@@ -27,12 +27,12 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
     return Elemwise::make(node->param().mode);
 }
 
-cg::OperatorNodeBase* apply_on_var_node(
+auto apply_on_var_node(
         const OpDef& def,
         const VarNodeArray& inputs) {
     auto&& elemwise_opr = def.cast_final_safe<Elemwise>();
     OperatorNodeConfig config{elemwise_opr.make_name()};
-    return opr::Elemwise::make(inputs, elemwise_opr.mode, config).node()->owner_opr();
+    return opr::Elemwise::make(inputs, elemwise_opr.mode, config);
 }
 
 std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
-- 
GitLab