feat(imperative/opr): add extern c opr and support dump

GitOrigin-RevId: b29e374c6052136d92f7f3eb5ac8debe983de855

feat(imperative/opr): add extern c opr and support dump
GitOrigin-RevId: b29e374c6052136d92f7f3eb5ac8debe983de855
27f2ecc4 · Megvii Engine Team · 91264f37 · 91264f37 · 27f2ecc4 · 27f2ecc4
9 changed file
--- a/imperative/python/megengine/core/tensor/core.py
+++ b/imperative/python/megengine/core/tensor/core.py
-# -*- coding: utf-8 -*-
-# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
-#
-# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-import collections
-import functools
-import inspect
-import sys
-import typing
-from abc import ABC
-
-
-class OpBase:
-    pass
-
-
-class TensorBase:
-    pass
-
-
-class TensorWrapperBase:
-    pass
--- a/imperative/python/megengine/core/tensor/megbrain_graph.py
+++ b/imperative/python/megengine/core/tensor/megbrain_graph.py
@@ -20,7 +20,6 @@ from .._imperative_rt import GraphOptimizeOptions
 from .._imperative_rt.core2 import apply, set_cpp_apply_backward_varnode
 from .._wrap import as_device
 from ..ops.builtin import OpDef
-from .core import TensorBase


 def set_priority_to_id(dest_vars):
@@ -127,7 +126,7 @@ class Graph(_imperative_rt.ComputingGraph):
            print("this function should be called after compilation.")


-class VarNode(TensorBase):
+class VarNode:
    def __init__(self, node: _imperative_rt.VarNode, isscalar=False):
        self._node = node
        self._isscalar = isscalar

--- a/imperative/python/megengine/functional/external.py
+++ b/imperative/python/megengine/functional/external.py
@@ -7,12 +7,31 @@
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # pylint: disable=redefined-builtin
-from typing import Sequence
+from typing import Iterable, List, Sequence

 from ..core._imperative_rt.core2 import apply
 from ..core.ops import builtin


+def extern_opr_subgraph(
+    inputs, output_shapes: List[tuple], dump_name: str, dump_data: bytes, output_dtypes
+):
+    r"""Load a serialized extern opr subgraph and fake execute the operator.
+
+    Args:
+        inputs: list of input tensors.
+        output_shapes: The output shapes.
+        dump_name: The serialized subgraph name.
+        dump_data: The serialized subgraph.
+    """
+    if not isinstance(inputs, Iterable):
+        inputs = (inputs,)
+    op = builtin.ExternOpr(
+        output_shapes, dump_name, dump_data, len(dump_data), output_dtypes
+    )
+    return apply(op, *inputs)
+
+
 def tensorrt_runtime_opr(inputs, *, data: bytes = None):
    # empty model will give None result
    if data is None:

--- a/imperative/python/megengine/jit/tracing.py
+++ b/imperative/python/megengine/jit/tracing.py
@@ -29,13 +29,13 @@ from ..core._imperative_rt.core2 import (
 from ..core._imperative_rt.ops import (
    AssertEqual,
    CollectiveComm,
+    ExternOpr,
    RemoteRecv,
    RemoteSend,
 )
 from ..core._trace_option import set_symbolic_shape
 from ..core._wrap import as_device
 from ..core.ops.builtin import BatchNorm, OpDef
-from ..core.ops.special import Const
 from ..core.tensor import megbrain_graph as G
 from ..core.tensor.utils import setscalar
 from ..utils.naming import AutoNaming
@@ -129,6 +129,7 @@ class trace:
        function: the function will be traced.
        symbolic: whether to apply symbolic execution for tracing. Default: False
        capture_as_const: capture global vars or closures as const value. Default: False
+        record_only: if True, won't run even if call the function. Default: False
        sublinear_memory_config: configuration for sublinear memory optimization.
            If not None, it enables sublinear memory optimization with given setting.
        profiling: whether to profile compiled trace. Default: False
@@ -147,6 +148,7 @@ class trace:
        function,
        symbolic=False,
        capture_as_const=False,
+        record_only=False,
        sublinear_memory_config: SublinearMemoryConfig = None,
        dtr_config: DTRConfig = None,
        profiling: bool = False,
@@ -155,8 +157,9 @@ class trace:
        symbolic_shape: bool = True,
    ):
        self.__wrapped__ = function
-        self._symbolic = symbolic
-        self._capture_as_const = capture_as_const
+        self._symbolic = symbolic or record_only
+        self._capture_as_const = capture_as_const or record_only
+        self._record_only = record_only
        self._sublinear_memory_config = sublinear_memory_config
        self._dtr_config = dtr_config
        self._profiling = profiling
@@ -418,35 +421,40 @@ class trace:
        def do_finalize():
            escaped_tensors = self._take_escaped_tensors()
            if self._untraced:
-                for x in escaped_tensors:
-                    if x():
-                        info = self._tinfo[x()._mixin_handle]
-                        info.data_read = True
-                        x()._mixin_handle = -1
-                        x()._recording = False
-                if self._inputs_to_restore:
-                    for x in self._inputs_to_restore:
-                        x._mixin_handle = -1
-                        x._recording = False
-                if self._symbolic and (
-                    self._lazy_eval_tensors or self._lazy_eval_links
-                ):
-                    # eval lazy eval tensors
-                    self._lazy_eval(
-                        self._lazy_eval_graph,
-                        self._lazy_eval_tensors,
-                        self._lazy_eval_links,
-                    )
+                if self._record_only:
                    self._lazy_eval_graph = None
                    self._lazy_eval_tensors = None
                    self._lazy_eval_links = None
-                self._untraced = False
+                else:
+                    for x in escaped_tensors:
+                        if x():
+                            info = self._tinfo[x()._mixin_handle]
+                            info.data_read = True
+                            x()._mixin_handle = -1
+                            x()._recording = False
+                    if self._inputs_to_restore:
+                        for x in self._inputs_to_restore:
+                            x._mixin_handle = -1
+                            x._recording = False
+                    if self._symbolic and (
+                        self._lazy_eval_tensors or self._lazy_eval_links
+                    ):
+                        # eval lazy eval tensors
+                        self._lazy_eval(
+                            self._lazy_eval_graph,
+                            self._lazy_eval_tensors,
+                            self._lazy_eval_links,
+                        )
+                        self._lazy_eval_graph = None
+                        self._lazy_eval_tensors = None
+                        self._lazy_eval_links = None
+                    self._untraced = False
            else:
                # compiled_tensor leaks
                if self._pc == len(self._seq):
                    for x in escaped_tensors:
                        try:
-                            assign_raw_tensor(x(), RawTensor(x()._dev_tensor()))
+                            x().__init__(RawTensor(x()._dev_tensor()))
                        except RuntimeError:
                            # TraceMismatchError thrown in do_exit
                            pass
@@ -769,8 +777,8 @@ class trace:
            raise ValueError(
                "you must specify capture_as_const=True at __init__ to use dump"
            )
-        if self._untraced:
-            raise RuntimeError("should run at least once before calling dump")
+        if self._untraced and len(self._seq) == 0:
+            raise RuntimeError("should do record first before dump")
        if self._output_names and output_names:
            raise TypeError(
                "cannot specify output_names when output is already in dict format"
@@ -1104,10 +1112,6 @@ class CompiledTensorProxy:
            self.__info.data_reader.drop_value()


-def assign_raw_tensor(lhs, rhs):
-    lhs.__init__(rhs)
-
-
 def apply_symbolic_mode(op: OpDef, *args: RawTensor):
    graph = active_trace._lazy_eval_graph
    ivars = []

--- a/imperative/python/megengine/module/external.py
+++ b/imperative/python/megengine/module/external.py
@@ -12,11 +12,55 @@ import numpy as np
 from ..functional.external import (
    atlas_runtime_opr,
    cambricon_runtime_opr,
+    extern_opr_subgraph,
    tensorrt_runtime_opr,
 )
 from .module import Module


+class ExternOprSubgraph(Module):
+    r"""Load a serialized ExternOpr subgraph.
+
+    See :func:`~.extern_opr` for more details.
+    """
+
+    def __init__(
+        self, output_shapes, dump_name, dump_data, output_dtypes=None, **kwargs
+    ):
+        super(ExternOprSubgraph, self).__init__(**kwargs)
+        self._output_shapes = output_shapes
+        self._dump_name = dump_name
+        self._dump_data = dump_data
+        self._output_dtypes = output_dtypes
+        if self._output_dtypes is None:
+            self._output_dtypes = [np.float32] * len(output_shapes)
+
+    @property
+    def data(self):
+        return self._dump_data
+
+    @data.setter
+    def data(self, val):
+        self._dump_data = np.frombuffer(val, dtype=np.uint8)
+
+    @property
+    def name(self):
+        return self._dump_name
+
+    @name.setter
+    def name(self, val):
+        self._dump_name = val
+
+    def forward(self, *inputs):
+        return extern_opr_subgraph(
+            inputs,
+            output_shapes=self._output_shapes,
+            dump_name=self._dump_name,
+            dump_data=self._dump_data,
+            output_dtypes=self._output_dtypes,
+        )
+
+
 class TensorrtRuntimeSubgraph(Module):
    r"""Load a serialized TensorrtRuntime subgraph.
    

--- a/imperative/python/test/integration/test_converge.py
+++ b/imperative/python/test/integration/test_converge.py
@@ -76,7 +76,7 @@ class XORNet(Module):
 @pytest.mark.parametrize("test_traced_module", [True, False])
 def test_training_converge(test_traced_module):
    net = XORNet()
-    if test_training_converge:
+    if test_traced_module:
        inp = Tensor(np.random.random((14, 2)))
        net = trace_module(net, inp)


--- a/imperative/src/impl/ops/extern_opr.cpp
+++ b/imperative/src/impl/ops/extern_opr.cpp
+/**
+ * \file imperative/src/impl/ops/extern_opr.cpp
+ * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+ *
+ * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+#include "megbrain/imperative/ops/autogen.h"
+#include "../op_trait.h"
+
+#include "megbrain/serialization/extern_c_opr_io.h"
+
+namespace mgb::imperative {
+
+namespace { namespace externopr {
+
+TensorShapeArray get_shapes(const std::vector<std::vector<size_t>>& shapes) {
+    TensorShapeArray ret;
+    for (auto&& i:shapes) {
+        SmallVector<size_t> shape(i.begin(), i.end());
+        TensorShape shp(shape);
+        ret.push_back(shp);
+    }
+    return ret;
+}
+
+auto apply_on_var_node(
+        const OpDef& def,
+        const VarNodeArray& inputs) {
+
+    auto&& op = static_cast<const ExternOpr&>(def);
+    SymbolVarArray symbol_var_inputs(inputs.begin(), inputs.end());
+
+    SmallVector<DType> output_dtypes(op.output_dtypes.begin(), op.output_dtypes.end());
+    auto&& output_shapes = get_shapes(op.output_shapes);
+
+    cg::OperatorNodeBase* opr = opr::ExternCOprRunner::make_placeholder(
+            symbol_var_inputs, output_shapes, op.name.c_str(), op.data.c_str(),
+            op.data_len, {}, output_dtypes);
+    return opr;
+}
+
+OP_TRAIT_REG(ExternOpr, ExternOpr, opr::ExternCOprRunner)
+    .apply_on_var_node(apply_on_var_node)
+    .fallback();
+}} // externopr
+
+}  // namespace mgb::imperative
+
--- a/sdk/c-opr-loaders/mace/dump_model.py
+++ b/sdk/c-opr-loaders/mace/dump_model.py
@@ -10,7 +10,7 @@ import argparse

 import numpy as np
 import yaml
-from megengine import jit
+from megengine import jit, tensor
 from megengine.module.external import ExternOprSubgraph


@@ -27,12 +27,12 @@ def main():
        description="load a .pb model and convert to corresponding "
        "load-and-run model"
    )
-    parser.add_argument("input", help="mace model file")
-    parser.add_argument("param", help="mace param file")
+    parser.add_argument("--input", help="mace model file")
+    parser.add_argument("--param", help="mace param file")
    parser.add_argument(
-        "output", help="converted model that can be fed to dump_with_testcase_mge.py"
+        "--output", help="converted model that can be fed to dump_with_testcase_mge.py"
    )
-    parser.add_argument("config", help="config file with yaml format")
+    parser.add_argument("--config", help="config file with yaml format")
    args = parser.parse_args()

    with open(args.config, "r") as f:
@@ -90,17 +90,17 @@ def main():
            + raw_param
        )

-        net = ExternOprSubgraph(wk_raw_content, "mace", osizes)
+        net = ExternOprSubgraph(osizes, "mace", wk_raw_content)
        net.eval()

-        @jit.trace(symbolic=True)
+        @jit.trace(record_only=True)
        def inference(inputs):
            return net(inputs)

        inputs = [
-            np.random.random(isizes[i]).astype(np.float32) for i in range(len(isizes))
+            tensor(np.random.random(isizes[i]).astype(np.float32)) for i in range(len(isizes))
        ]
-        inference.trace(*inputs)
+        inference(*inputs)
        inference.dump(args.output)



--- a/src/core/include/megbrain/ir/ops.td
+++ b/src/core/include/megbrain/ir/ops.td
@@ -381,6 +381,24 @@ def CheckHasInf: MgbHashableOp<"CheckHasInf", [EmptyParam]>;

 def FastpathCopy: MgbHashableOp<"FastpathCopy">;

+def ExternOpr: MgbHashableOp<"ExternOpr"> {
+  let extraArguments = (ins
+    MgbArrayAttr<MgbArrayAttr<MgbSizeTAddr>>:$output_shapes,
+    MgbStringAttr:$name,
+    MgbStringAttr:$data,
+    MgbSizeTAddr:$data_len,
+    MgbArrayAttr<MgbDTypeAttr>:$output_dtypes
+  );
+  let hashFunction = [{
+    return mgb::hash_pair_combine(
+      mgb::hash($_self.dyn_typeinfo()),
+      mgb::hash_pair_combine(
+        mgb::hash($_self.name),
+        mgb::hash($_self.data))
+      );
+  }];
+}
+
 def Cumsum: MgbHashableOp<"Cumsum", [CumsumParam]>;

 def Split: MgbHashableOp<"Split", [EmptyParam]> {