提交 4f0e6eae 编写于 作者: M Megvii Engine Team

fix(mge/trace): re-open gopt level in trace

GitOrigin-RevId: 5ebc712690eb65d2c6e4fe08b6353cd403f18cea
上级 8494a152
...@@ -131,7 +131,7 @@ class trace: ...@@ -131,7 +131,7 @@ class trace:
:param sublinear_memory_config: configuration for sublinear memory optimization. :param sublinear_memory_config: configuration for sublinear memory optimization.
If not None, it enables sublinear memory optimization with given setting. If not None, it enables sublinear memory optimization with given setting.
:param profiling: whether to profile compiled trace. Default: False :param profiling: whether to profile compiled trace. Default: False
:param opt_level: optimization level for compiling trace. :param opt_level: optimization level for compiling trace. Default: 2
:param symbolic_shape: whether to use symbolic shape for tracing. Default: True :param symbolic_shape: whether to use symbolic shape for tracing. Default: True
""" """
...@@ -147,7 +147,7 @@ class trace: ...@@ -147,7 +147,7 @@ class trace:
capture_as_const=False, capture_as_const=False,
sublinear_memory_config: SublinearMemoryConfig = None, sublinear_memory_config: SublinearMemoryConfig = None,
profiling: bool = False, profiling: bool = False,
opt_level: int = None, opt_level: int = 2,
symbolic_shape: bool = True, symbolic_shape: bool = True,
): ):
self.__wrapped__ = function self.__wrapped__ = function
...@@ -377,11 +377,7 @@ class trace: ...@@ -377,11 +377,7 @@ class trace:
) )
readers = [G.OutputNode(x()._varnode).outputs[0] for x in lazy_eval_tensors] readers = [G.OutputNode(x()._varnode).outputs[0] for x in lazy_eval_tensors]
self._apply_graph_options(lazy_eval_graph) self._apply_graph_options(lazy_eval_graph)
# FIXME
if self._graph_opt_level is not None:
lazy_eval_graph.options.graph_opt_level = self._graph_opt_level lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
else:
lazy_eval_graph.options.graph_opt_level = 2
lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers]) lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers])
lazy_eval_graph.compile(*lazy_eval_links, *readers) lazy_eval_graph.compile(*lazy_eval_links, *readers)
lazy_eval_graph() lazy_eval_graph()
...@@ -500,11 +496,7 @@ class trace: ...@@ -500,11 +496,7 @@ class trace:
graph.options.no_force_inplace = True graph.options.no_force_inplace = True
graph.options.seq_opt.enable_seq_comp_node_opt = False graph.options.seq_opt.enable_seq_comp_node_opt = False
# graph opt level graph.options.graph_opt_level = self._graph_opt_level
# if self._graph_opt_level is not None:
# graph.options.graph_opt_level = self._graph_opt_level
# FIXME
graph.options.graph_opt_level = 0
# sublinear # sublinear
if self._sublinear_memory_config is not None: if self._sublinear_memory_config is not None:
graph.options.enable_sublinear_memory_opt = True graph.options.enable_sublinear_memory_opt = True
...@@ -634,11 +626,7 @@ class trace: ...@@ -634,11 +626,7 @@ class trace:
opnode = info.shape_reader = G.AttrOutputNode(v, *in_out_links) opnode = info.shape_reader = G.AttrOutputNode(v, *in_out_links)
add_reader(opnode) add_reader(opnode)
# FIXME
if self._graph_opt_level is not None:
graph.options.graph_opt_level = self._graph_opt_level graph.options.graph_opt_level = self._graph_opt_level
else:
graph.options.graph_opt_level = 2
graph._set_priority_to_id([*readers, *in_out_links, *io_links]) graph._set_priority_to_id([*readers, *in_out_links, *io_links])
graph.compile(*readers, *in_out_links, *io_links) graph.compile(*readers, *in_out_links, *io_links)
......
...@@ -113,6 +113,48 @@ def test_exclude_from_trace(trace_mode): ...@@ -113,6 +113,48 @@ def test_exclude_from_trace(trace_mode):
np.testing.assert_equal(f(x).numpy(), y) np.testing.assert_equal(f(x).numpy(), y)
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
# explicitly declare opt_level as 2
@trace(symbolic=trace_mode, opt_level=2)
def f(a, b):
base = 0
c = b - a
_, idx = F.topk(c, 3)
# internally, biased_idx will be idx as gopt will ignore the addition
biased_idx = base + idx
return biased_idx
a = tensor(np.ones((7, 2)), dtype=np.int32)
b = tensor(2 * np.ones((7, 2)), dtype=np.float32)
for i in range(3):
y = f(a, b)
y.numpy()
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
w = Parameter(np.ones([4, 6]), dtype="float32")
gm = GradManager().attach(w)
opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)
# explicitly declare opt_level as 2
@trace(symbolic=trace_mode, opt_level=2)
def f():
with gm:
wm = F.sum(w ** 2, axis=1) ** 0.5
loss = wm.mean()
gm.backward(loss)
opt.step().clear_grad()
return loss
for i in range(3):
y = f()
y.numpy()
def test_print_in_trace(): def test_print_in_trace():
for symbolic in [False]: # cannot read value in symbolic mode for symbolic in [False]: # cannot read value in symbolic mode
...@@ -221,7 +263,6 @@ def test_trace_profiler(trace_mode): ...@@ -221,7 +263,6 @@ def test_trace_profiler(trace_mode):
assert out.get("profiler") assert out.get("profiler")
@pytest.mark.skip(reason="force opt_level=0 when building graph")
def test_goptions(): def test_goptions():
@trace(symbolic=True, opt_level=0, capture_as_const=True) @trace(symbolic=True, opt_level=0, capture_as_const=True)
def f(x): def f(x):
...@@ -240,7 +281,6 @@ def test_goptions(): ...@@ -240,7 +281,6 @@ def test_goptions():
np.testing.assert_equal(g(d).numpy().item(), 1.0) np.testing.assert_equal(g(d).numpy().item(), 1.0)
@pytest.mark.skip(reason="force opt_level=0 when building graph")
def test_goptions_log_sum_exp(): def test_goptions_log_sum_exp():
@trace(symbolic=True, opt_level=0, capture_as_const=True) @trace(symbolic=True, opt_level=0, capture_as_const=True)
def f(x, y): def f(x, y):
......
...@@ -27,12 +27,12 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) { ...@@ -27,12 +27,12 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
return Elemwise::make(node->param().mode); return Elemwise::make(node->param().mode);
} }
cg::OperatorNodeBase* apply_on_var_node( auto apply_on_var_node(
const OpDef& def, const OpDef& def,
const VarNodeArray& inputs) { const VarNodeArray& inputs) {
auto&& elemwise_opr = def.cast_final_safe<Elemwise>(); auto&& elemwise_opr = def.cast_final_safe<Elemwise>();
OperatorNodeConfig config{elemwise_opr.make_name()}; OperatorNodeConfig config{elemwise_opr.make_name()};
return opr::Elemwise::make(inputs, elemwise_opr.mode, config).node()->owner_opr(); return opr::Elemwise::make(inputs, elemwise_opr.mode, config);
} }
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册