From 147cef52a09afe41a08dead5d64c8e60b6c9e2bc Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 1 Dec 2020 14:12:15 +0800 Subject: [PATCH] refactor(mge/imperative): implement new tensor system GitOrigin-RevId: 2dd4e460ac32f059e91c7cdef859d82dee704b4b --- .../python/megengine/autodiff/grad_manager.py | 2 +- .../python/megengine/core/autodiff/grad.py | 26 ++ .../python/megengine/core/ops/special.py | 11 +- .../python/megengine/core/tensor/core.py | 8 + .../python/megengine/core/tensor/indexing.py | 15 +- .../megengine/core/tensor/tensor_wrapper.py | 89 ++-- .../python/megengine/core/tensor/utils.py | 44 +- .../megengine/distributed/functional.py | 67 +-- .../python/megengine/distributed/helper.py | 3 +- .../python/megengine/functional/elemwise.py | 2 +- .../python/megengine/functional/math.py | 3 +- imperative/python/megengine/functional/nn.py | 6 +- .../python/megengine/functional/quantized.py | 2 +- .../python/megengine/functional/tensor.py | 12 +- .../python/megengine/functional/utils.py | 4 +- .../python/megengine/random/distribution.py | 2 +- imperative/python/megengine/tensor.py | 74 +++- imperative/python/src/grad.cpp | 404 ++++++++++++++++++ imperative/python/src/grad.h | 54 +++ imperative/python/src/grad_info.h | 36 ++ imperative/python/src/intrusive_list.h | 227 ++++++++++ imperative/python/src/module.cpp | 5 + imperative/python/src/pyext17.h | 191 ++++++++- imperative/python/src/tensor.cpp | 257 +++++++++++ imperative/python/src/tensor.h | 157 +++++++ imperative/python/src/trace.h | 17 + .../python/test/unit/core/test_async_level.py | 2 + .../python/test/unit/core/test_autodiff.py | 57 ++- .../python/test/unit/core/test_indexing_op.py | 43 +- .../test/unit/core/test_tensor_wrapper.py | 20 +- .../test/unit/functional/test_tensor.py | 2 +- imperative/src/impl/op_trait.cpp | 2 +- imperative/src/impl/ops/collective_comm.cpp | 2 +- imperative/src/impl/ops/io_remote.cpp | 2 +- imperative/src/impl/ops/opr_attr.cpp | 2 +- imperative/src/impl/proxy_graph_detail.cpp | 2 +- .../megbrain/imperative}/proxy_graph_detail.h | 2 +- 37 files changed, 1605 insertions(+), 249 deletions(-) create mode 100644 imperative/python/src/grad.cpp create mode 100644 imperative/python/src/grad.h create mode 100644 imperative/python/src/grad_info.h create mode 100644 imperative/python/src/intrusive_list.h create mode 100644 imperative/python/src/tensor.cpp create mode 100644 imperative/python/src/tensor.h create mode 100644 imperative/python/src/trace.h rename imperative/src/{impl => include/megbrain/imperative}/proxy_graph_detail.h (93%) diff --git a/imperative/python/megengine/autodiff/grad_manager.py b/imperative/python/megengine/autodiff/grad_manager.py index 743ef1be1..f94abe8fc 100644 --- a/imperative/python/megengine/autodiff/grad_manager.py +++ b/imperative/python/megengine/autodiff/grad_manager.py @@ -301,7 +301,7 @@ class GradManager: if tensor is None: return - def callback(_, grad, callbacks=spec.callbacks): + def callback(grad, callbacks=spec.callbacks): for cb in callbacks: grad = cb(tensor, grad) self._gradients[id(tensor)] = grad diff --git a/imperative/python/megengine/core/autodiff/grad.py b/imperative/python/megengine/core/autodiff/grad.py index f14080d80..0a6244cb1 100644 --- a/imperative/python/megengine/core/autodiff/grad.py +++ b/imperative/python/megengine/core/autodiff/grad.py @@ -16,6 +16,7 @@ import numpy as np import megengine as mge +from .._imperative_rt import core2 from ..ops.builtin import Elemwise, OpDef, RemoteSend from ..ops.special import Const from ..tensor.core import TensorBase, TensorWrapperBase, apply @@ -418,3 +419,28 @@ def tracer_apply(op: (OpDef, Function), *args: typing.Optional[Tracer]): @apply.register() def _(op: Const, *_: typing.Optional[Tracer]): return None + + +class Grad: + def __init__(self): + self._impl = core2.GradKey() + + def wrt(self, *tensors, callback=None): + for x in tensors: + self._impl.attach(x, callback) + return self + + def __call__(self, ys, dys): + from collections.abc import Sequence + + if not isinstance(ys, Sequence): + ys = [ys] + if not isinstance(dys, Sequence): + dys = [dys] + core2.backward(self._impl, ys, dys) + + def __enter__(self): + return self + + def __exit__(self, _1, _2, _3): + del self._impl diff --git a/imperative/python/megengine/core/ops/special.py b/imperative/python/megengine/core/ops/special.py index e427c8f59..db1503ee5 100644 --- a/imperative/python/megengine/core/ops/special.py +++ b/imperative/python/megengine/core/ops/special.py @@ -6,11 +6,18 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import numpy as np + +from .._imperative_rt.core2 import Tensor from ..tensor.core import OpBase, TensorBase, apply -class Const(OpBase): +class Const: def __init__(self, value=None, *, dtype=None, device=None): - self.value = value + self.value = np.asarray(value, dtype=dtype) self.dtype = dtype self.device = device + + def __call__(self, *reference): + Wrapper = type(reference[0]) + return (Wrapper(self.value, self.dtype, self.device),) diff --git a/imperative/python/megengine/core/tensor/core.py b/imperative/python/megengine/core/tensor/core.py index 0c1bcee79..07d6edca3 100644 --- a/imperative/python/megengine/core/tensor/core.py +++ b/imperative/python/megengine/core/tensor/core.py @@ -13,9 +13,17 @@ import sys import typing from abc import ABC +from .._imperative_rt.core2 import apply as apply2 from .multipledispatch import Dispatcher +def apply_op(op, *args): + Wrapper = type(args[0]) + args = [arg._tensor for arg in args] + results = apply2(op, *args) + return tuple(map(Wrapper, results)) + + class OpBase(ABC): def __call__(self, *args): return apply(self, *args) diff --git a/imperative/python/megengine/core/tensor/indexing.py b/imperative/python/megengine/core/tensor/indexing.py index 7f2934b84..8e952048f 100644 --- a/imperative/python/megengine/core/tensor/indexing.py +++ b/imperative/python/megengine/core/tensor/indexing.py @@ -10,10 +10,10 @@ from typing import Iterable import numpy as np +from .._imperative_rt.core2 import Tensor, apply from .._trace_option import use_symbolic_shape from ..ops import builtin from ..ops.special import Const -from .core import TensorBase, TensorWrapperBase, apply from .utils import astensor1d, isscalar, make_shape_tuple @@ -149,13 +149,13 @@ def unpack_getitem(inp, tuple_val, *, allow_newaxis=True): return True def get_index(i): - if not isinstance(i, (TensorBase, TensorWrapperBase)): + if not isinstance(i, (Tensor)): if is_bool_list(i) or isinstance(i, np.ndarray) and i.dtype == np.bool_: (i,) = Const(i, dtype=np.bool_, device=inp.device)(inp) else: (i,) = Const(i, dtype=np.int32, device=inp.device)(inp) return i - assert isinstance(i, (TensorBase, TensorWrapperBase)) + assert isinstance(i, Tensor) if i.dtype != np.bool_: return i _, ind = apply(builtin.CondTake(), i, i) @@ -198,8 +198,8 @@ def try_condtake(tensor, index): return [] if isinstance(index, np.ndarray): (index,) = Const(index, dtype=np.bool_, device=tensor.device)(tensor) - assert isinstance(index, (TensorBase, TensorWrapperBase)) - if not isinstance(tensor, (TensorWrapperBase, TensorBase)): + assert isinstance(index, Tensor) + if not isinstance(tensor, Tensor): raise TypeError("input must be a tensor") if tensor.device != index.device: raise ValueError( @@ -227,7 +227,7 @@ def getitem(tensor, index): op = builtin.IndexingMultiAxisVec(items=items) (result,) = apply(op, tensor, *tensors) if ret_scalar: - result.__wrapped__._data._isscalar = True + result.setscalar() return result @@ -239,7 +239,7 @@ def setitem(tensor, index, value): if index.shape[0] == 0: return tensor tensor = tensor.reshape(-1) - if not isinstance(value, (TensorBase, TensorWrapperBase)): + if not isinstance(value, Tensor): op = Const(value, dtype=tensor.dtype, device=tensor.device) (value,) = op(tensor) tensor, tensors, items, use_subtensor, _ = unpack_getitem(tensor, index) @@ -250,6 +250,7 @@ def setitem(tensor, index, value): op = builtin.Subtensor(items=items) else: op = builtin.IndexingMultiAxisVec(items=items) + (tmp_result,) = apply(op, tensor, *tensors) # XXX: broadcast can always be applied even if shapes are equal diff --git a/imperative/python/megengine/core/tensor/tensor_wrapper.py b/imperative/python/megengine/core/tensor/tensor_wrapper.py index 357999a0a..a197f617e 100644 --- a/imperative/python/megengine/core/tensor/tensor_wrapper.py +++ b/imperative/python/megengine/core/tensor/tensor_wrapper.py @@ -8,19 +8,20 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import abc import collections +from typing import Union import numpy as np +from .._imperative_rt.common import CompNode +from .._imperative_rt.core2 import Tensor, apply from .._trace_option import use_symbolic_shape from ..ops import builtin from ..ops.builtin import Elemwise, GetVarShape from ..ops.special import Const from . import utils -from .core import OpBase, TensorBase, TensorWrapperBase, apply +from .core import OpBase, TensorBase, TensorWrapperBase from .indexing import getitem as _getitem from .indexing import setitem as _setitem -from .raw_tensor import RawTensor, as_raw_tensor -from .tensor import Tensor from .utils import isscalar from .utils import make_shape_tuple as _make_shape_tuple from .utils import setscalar @@ -41,6 +42,7 @@ def _elwise(*args, mode): ) args = utils.convert_inputs(*args) (result,) = apply(op, *args) + _isscalar = True for i in args: if isscalar(i) == False: @@ -84,9 +86,7 @@ def _reshape(x, shape): if unspec_axis is not None: raise ValueError("multiple -1 in shape: {} & {}".format(unspec_axis, i)) unspec_axis = i - shape = utils.astensor1d(shape, x, dtype="int32", device=x.device) - if unspec_axis is None: op = builtin.Reshape() else: @@ -181,7 +181,6 @@ def _reduce(mode): elif isinstance(axis, collections.abc.Iterable): axis = list(axis) axis.sort(reverse=True) - for ai in axis: op = builtin.Reduce(mode=mode, axis=ai) (data,) = apply(op, data) @@ -221,10 +220,7 @@ def _todo(*_): def _expand_args(args): if len(args) == 1: - if isinstance( - args[0], - (collections.abc.Sequence, TensorBase, TensorWrapperBase, np.ndarray), - ): + if isinstance(args[0], (collections.abc.Sequence, Tensor, np.ndarray),): args = args[0] return args @@ -240,9 +236,8 @@ class ArrayMethodMixin(abc.ABC): return self.numpy().astype(dtype) def __array_wrap__(self, array): - return TensorWrapper( - as_raw_tensor(array, dtype=array.dtype, device=self.device) - ) + Wrapper = type(self) + return Wrapper(array, dtype=array.dtype, device=self.device) @abc.abstractmethod def _reset(self, other): @@ -253,7 +248,11 @@ class ArrayMethodMixin(abc.ABC): pass @abc.abstractproperty - def shape(self) -> tuple: + def shape(self) -> Union[tuple, Tensor]: + pass + + @abc.abstractproperty + def _tuple_shape(self) -> tuple: pass @abc.abstractmethod @@ -331,7 +330,7 @@ class ArrayMethodMixin(abc.ABC): __complex__ = lambda self: complex(self.item()) def __len__(self): - shape = self.__wrapped__.shape + shape = self._tuple_shape if shape: return int(shape[0]) raise TypeError("ndim is 0") @@ -352,7 +351,7 @@ class ArrayMethodMixin(abc.ABC): @property def ndim(self): - shape = self.__wrapped__.shape + shape = self._tuple_shape if shape is None: raise ValueError("unkown ndim") return len(shape) @@ -480,22 +479,52 @@ class GenericTensorWrapper(ArrayMethodMixin, TensorWrapperBase): self.__wrapped__._swap_out() -class TensorWrapper(GenericTensorWrapper): - def __init__(self, data, dtype=None, device=None): - if isinstance(data, TensorWrapperBase): - data = data.__wrapped__ - elif not isinstance(data, TensorBase): - assert data is not None, "Cannot init a tensor with data as None" - data = Tensor(as_raw_tensor(data, dtype=dtype, device=device)) - super().__init__(data) +class TensorWrapper(ArrayMethodMixin, TensorBase): + def __init__(self, data, dtype=None, device=None, isscalar=False): + self._isscalar = isscalar + if isinstance(data, Tensor): + self._tensor = data + else: + if device is None: + device = CompNode._get_default_device() + self._tensor = Tensor(data, dtype, device) def _reset(self, other): - if isinstance(other, TensorWrapperBase): - self.__wrapped__ = other.__wrapped__ - elif isinstance(other, TensorBase): - self.__wrapped__ = other - else: - self._reset(type(self)(other, dtype=self.dtype, device=self.device)) + if not isinstance(other, __class__): + raise TypeError(type(other)) + self._tensor = other._tensor + return self + + @property + def dtype(self): + return self._tensor.dtype + + @property + def shape(self): + if self._isscalar: + return () + shape = self._tensor.shape + if shape == () or not use_symbolic_shape(): + return shape + return apply(GetVarShape(), self)[0] + + @property + def device(self): + return self._tensor.device + + def numpy(self): + if self._isscalar: + return self._tensor.numpy().squeeze() + return self._tensor.numpy() + + def _drop(self): + self._tensor._drop() + + def _swap_in(self): + self._tensor._swap_in() + + def _swap_out(self): + self._tensor._swap_out() def __repr__(self): piece = "Tensor(" diff --git a/imperative/python/megengine/core/tensor/utils.py b/imperative/python/megengine/core/tensor/utils.py index 287ad1e2d..0eed59ed1 100644 --- a/imperative/python/megengine/core/tensor/utils.py +++ b/imperative/python/megengine/core/tensor/utils.py @@ -11,9 +11,10 @@ from typing import Iterable, Union import numpy as np +from .._imperative_rt.core2 import Tensor, apply from ..ops import builtin from ..ops.special import Const -from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply +from ..tensor.core import OpBase, TensorBase, TensorWrapperBase from .dtype import is_equal, is_quantize _enable_convert_inputs = True @@ -109,7 +110,7 @@ def dtype_promotion(inputs): def get_device(inputs): device = None for i in inputs: - if isinstance(i, (TensorWrapperBase, TensorBase)): + if isinstance(i, Tensor): if device is None: device = i.device elif device != i.device: @@ -126,30 +127,31 @@ def concatenate(inputs, axis=0, *, device=None): return convert_single_value(x, inputs, dtype=dtype) inputs = tuple(map(convert, inputs)) - (result,) = apply(builtin.Concat(axis=axis, comp_node=device.to_c()), *inputs) + (result,) = apply(builtin.Concat(axis=axis, comp_node=device), *inputs) return result def astype(x, dtype): dtype = np.dtype(dtype) if not is_equal(x.dtype, dtype): - isscalar = x.__wrapped__._data._isscalar + isscalar = x.isscalar() (x,) = apply(builtin.TypeCvt(dtype=dtype), x) - x.__wrapped__._data._isscalar = isscalar + if isscalar: + x.setscalar() return x def convert_single_value(v, inputs, *, dtype=None, device=None): - tensors = [i for i in inputs if isinstance(i, (TensorBase, TensorWrapperBase))] + tensors = [i for i in inputs if isinstance(i, Tensor)] assert len(tensors) > 0 - if isinstance(v, (TensorWrapperBase, TensorBase)): + if isinstance(v, (TensorWrapperBase, Tensor)): v = astype(v, v.dtype if is_quantize(v.dtype) else dtype) else: (v,) = Const(v, dtype=dtype, device=device)(*tensors) return v -def convert_inputs(*args: TensorBase): +def convert_inputs(*args: Tensor): if not _enable_convert_inputs: return args @@ -167,7 +169,7 @@ def convert_inputs(*args: TensorBase): def result_type(*args): dtypes = [] for i in args: - if isinstance(i, (TensorWrapperBase, TensorBase)): + if isinstance(i, Tensor): dtypes.append(i.dtype) continue try: @@ -178,25 +180,16 @@ def result_type(*args): def isscalar(x): - if isinstance(x, TensorWrapperBase): - x = x.__wrapped__ - if hasattr(x, "_isscalar"): - return x._isscalar - if isinstance(x, TensorBase): - return x._data._isscalar + if isinstance(x, Tensor): + return x.isscalar() return np.isscalar(x) def setscalar(x): - if isinstance(x, TensorWrapperBase): - x = x.__wrapped__ - - if hasattr(x, "_isscalar"): - x._isscalar = True - elif isinstance(x, TensorBase): - x._data._isscalar = True + if isinstance(x, Tensor): + x.setscalar() else: raise NotImplementedError("Unsupport type {}".format(type(x))) @@ -215,25 +208,24 @@ def astensor1d(x, *reference, dtype=None, device=None): else: if ndim != 0 and ndim != 1: raise ValueError("ndim != 1 or 0, get : %d" % ndim) - if not isinstance(x, (TensorBase, TensorWrapperBase)): + if not isinstance(x, Tensor): (x,) = Const(x, dtype=dtype, device=device)(*reference) return x if not isinstance(x, collections.abc.Sequence): raise TypeError - if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x): + if any(isinstance(i, Tensor) for i in x): x = concatenate(x, device=device) if dtype is not None: x = astype(x, dtype) return x - (x,) = Const(x, dtype=dtype, device=device)(*reference) return x def _expand_int(s, i): - if isinstance(i, (TensorBase, TensorWrapperBase)): + if isinstance(i, Tensor): i_np = i.numpy() if i_np.ndim == 0: s.append(int(i_np)) diff --git a/imperative/python/megengine/distributed/functional.py b/imperative/python/megengine/distributed/functional.py index 1816251bf..2e0457b99 100644 --- a/imperative/python/megengine/distributed/functional.py +++ b/imperative/python/megengine/distributed/functional.py @@ -8,6 +8,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from typing import Optional, Tuple +from ..core._imperative_rt.core2 import apply from ..core.autodiff.builtin_op_utils import builtin_op_get_backward_fn from ..core.autodiff.grad import ( Tracer, @@ -17,7 +18,6 @@ from ..core.autodiff.grad import ( tracer_apply, ) from ..core.ops.builtin import CollectiveComm, Copy, RemoteRecv, RemoteSend -from ..core.tensor.core import apply from ..core.tensor.tensor import Tensor, tensor_apply from ..device import get_default_device from ..tensor import tensor @@ -39,71 +39,6 @@ __all__ = [ ] -@apply.register() -def _(op: RemoteSend, *args: Tensor): - ret = tensor_apply(op, *args) - - # set extra information - tracer_set = dict() - for k in set().union(*(i._extra_data for i in args if isinstance(i, Tensor))): - tracer_set[k.name] = True - - # check tracer_set in remote_recv - get_client().set_remote_tracer(op.key, tracer_set) - return ret - - -@builtin_op_get_backward_fn.register(RemoteSend) -def _(op: RemoteSend, inputs, outputs, input_requires_grad): - def backward(*args): - return [ - remote_recv( - op.rank_to, - inputs[0].shape, - inputs[0].dtype, - device=str(inputs[0].device), - inp=inputs[0], - ) - ] - - return backward, [True] - - -@get_op_has_grad_fn.register(RemoteSend) -def _(op: RemoteSend): - def has_grad(opnode, reached): - return get_client().check_is_grad(op.key) - - return has_grad - - -@check_backward_allow_noinput.register(RemoteSend) -def _(op: RemoteSend): - return True - - -@builtin_op_get_backward_fn.register(RemoteRecv) -def _(op: RemoteRecv, inputs, outputs, input_requires_grad): - def backward(*output_grads): - return [remote_send(output_grads[0], op.rank_from)] - - return backward, [True] - - -@get_op_has_grad_fn.register(RemoteRecv) -def _(op: RemoteRecv): - def has_grad(opnode, reached): - ret = False - for v in opnode.outputs: - if v() in reached: - ret = True - break - get_client().set_is_grad(op.key, ret) - return ret - - return has_grad - - def collective_comm(inp, mode, group, device): """Helper function for applying collective communication functions.""" assert isinstance(group, Group) diff --git a/imperative/python/megengine/distributed/helper.py b/imperative/python/megengine/distributed/helper.py index f8269251e..883470e34 100644 --- a/imperative/python/megengine/distributed/helper.py +++ b/imperative/python/megengine/distributed/helper.py @@ -17,8 +17,8 @@ import numpy as np from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager from megengine.device import get_default_device, get_device_count +from ..core._imperative_rt.core2 import apply from ..core.ops.builtin import ParamPackConcat, ParamPackSplit -from ..core.tensor.core import apply from ..functional.utils import copy from ..tensor import Tensor from ..utils.future import Future @@ -228,7 +228,6 @@ class AllreduceCallback: self._packing_size[dtype] = 0 def __call__(self, param, grad): - param = param.__wrapped__ gm = get_backwarding_grad_manager() assert isinstance(gm, GradManager) if gm not in self._marked_gm: diff --git a/imperative/python/megengine/functional/elemwise.py b/imperative/python/megengine/functional/elemwise.py index 6ea379769..3148a8fa7 100644 --- a/imperative/python/megengine/functional/elemwise.py +++ b/imperative/python/megengine/functional/elemwise.py @@ -9,10 +9,10 @@ # pylint: disable=unused-argument,invalid-name,redefined-builtin,arguments-out-of-order import functools +from ..core._imperative_rt.core2 import apply from ..core.ops import builtin from ..core.ops.builtin import Elemwise from ..core.tensor import megbrain_graph, utils -from ..core.tensor.core import apply from ..core.tensor.utils import isscalar, setscalar from ..device import get_default_device from ..jit.tracing import is_tracing diff --git a/imperative/python/megengine/functional/math.py b/imperative/python/megengine/functional/math.py index 7825fed39..8b3296f77 100644 --- a/imperative/python/megengine/functional/math.py +++ b/imperative/python/megengine/functional/math.py @@ -12,10 +12,11 @@ import math import numbers from typing import Optional, Sequence, Tuple, Union +from ..core._imperative_rt.core2 import apply from ..core.ops import builtin from ..core.ops.special import Const from ..core.tensor import utils -from ..core.tensor.core import TensorBase, TensorWrapperBase, apply +from ..core.tensor.core import TensorBase, TensorWrapperBase from ..tensor import Tensor from .elemwise import clip, exp, log, log1p from .tensor import reshape, squeeze diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index e936b1cab..b431a6c62 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -10,12 +10,12 @@ from typing import Optional, Sequence, Tuple, Union from ..core._imperative_rt import CompNode +from ..core._imperative_rt.core2 import Tensor, apply from ..core._trace_option import use_symbolic_shape from ..core.ops import builtin from ..core.ops.builtin import BatchNorm from ..core.ops.special import Const from ..core.tensor import megbrain_graph, utils -from ..core.tensor.core import TensorBase, TensorWrapperBase, apply from ..core.tensor.utils import astensor1d from ..distributed import WORLD, is_distributed from ..jit.tracing import is_tracing @@ -1565,9 +1565,7 @@ def indexing_one_hot( [1.] """ - assert isinstance( - src, (TensorWrapperBase, TensorBase) - ), "src must be of Tensor type" + assert isinstance(src, Tensor), "src must be of Tensor type" op = builtin.IndexingOneHot(axis=axis) index = utils.convert_single_value(index, (src,), dtype="int32", device=src.device) (result,) = apply(op, src, index) diff --git a/imperative/python/megengine/functional/quantized.py b/imperative/python/megengine/functional/quantized.py index b18f52d2d..07ea9d574 100644 --- a/imperative/python/megengine/functional/quantized.py +++ b/imperative/python/megengine/functional/quantized.py @@ -8,8 +8,8 @@ # pylint: disable=too-many-lines from typing import Tuple, Union +from ..core._imperative_rt.core2 import apply from ..core.ops import builtin -from ..core.tensor.core import apply from ..tensor import Tensor from .debug_param import get_conv_execution_strategy from .types import _pair, _pair_nonzero diff --git a/imperative/python/megengine/functional/tensor.py b/imperative/python/megengine/functional/tensor.py index ad180bee2..081fef85c 100644 --- a/imperative/python/megengine/functional/tensor.py +++ b/imperative/python/megengine/functional/tensor.py @@ -14,10 +14,10 @@ from typing import Iterable, List, Optional, Sequence, Tuple, Union import numpy as np from ..core._imperative_rt import CompNode +from ..core._imperative_rt.core2 import Tensor, apply from ..core._wrap import device as as_device from ..core.ops import builtin from ..core.ops.special import Const -from ..core.tensor.core import TensorBase, TensorWrapperBase, apply from ..core.tensor.tensor_wrapper import _broadcast, _remove_axis from ..core.tensor.utils import ( astensor1d, @@ -611,11 +611,11 @@ def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: """ x, y = convert_inputs(x, y) - if not isinstance(x, (TensorWrapperBase, TensorBase)): + if not isinstance(x, Tensor): raise TypeError("input x must be a tensor") - if not isinstance(y, (TensorWrapperBase, TensorBase)): + if not isinstance(y, Tensor): raise TypeError("input y must be a tensor") - if not isinstance(mask, (TensorWrapperBase, TensorBase)): + if not isinstance(mask, Tensor): raise TypeError("mask must be a tensor") if mask.dtype != np.bool_: raise ValueError("mask must be bool") @@ -668,9 +668,9 @@ def cond_take(mask: Tensor, x: Tensor) -> Tensor: [1. 4.] [0 3] """ - if not isinstance(x, (TensorWrapperBase, TensorBase)): + if not isinstance(x, Tensor): raise TypeError("input must be a tensor") - if not isinstance(mask, (TensorWrapperBase, TensorBase)): + if not isinstance(mask, Tensor): raise TypeError("mask must be a tensor") if mask.dtype != np.bool_: raise ValueError("mask must be bool") diff --git a/imperative/python/megengine/functional/utils.py b/imperative/python/megengine/functional/utils.py index a8e6b0e02..a4195c22d 100644 --- a/imperative/python/megengine/functional/utils.py +++ b/imperative/python/megengine/functional/utils.py @@ -11,10 +11,10 @@ from typing import Iterable, Union import numpy as np +from ..core._imperative_rt.core2 import apply from ..core._wrap import device as as_device from ..core.ops.builtin import Copy, Identity -from ..core.tensor import Tensor -from ..core.tensor.core import apply +from ..tensor import Tensor from .math import topk as _topk from .tensor import broadcast_to, transpose diff --git a/imperative/python/megengine/random/distribution.py b/imperative/python/megengine/random/distribution.py index 82852200f..fc8c3fd54 100644 --- a/imperative/python/megengine/random/distribution.py +++ b/imperative/python/megengine/random/distribution.py @@ -10,9 +10,9 @@ from typing import Iterable, Optional from .. import Tensor from ..core._imperative_rt import invoke_op +from ..core._imperative_rt.core2 import apply from ..core.ops.builtin import GaussianRNG, UniformRNG from ..core.tensor import utils -from ..core.tensor.core import apply from .rng import _random_seed_generator __all__ = ["normal", "uniform"] diff --git a/imperative/python/megengine/tensor.py b/imperative/python/megengine/tensor.py index 9772870f7..6c13d9df6 100644 --- a/imperative/python/megengine/tensor.py +++ b/imperative/python/megengine/tensor.py @@ -10,26 +10,66 @@ import collections -from .core import Tensor as _Tensor -from .core.ops.builtin import Copy -from .core.tensor.core import apply +import numpy as np + +from .core._imperative_rt import CompNode +from .core._imperative_rt.core2 import Tensor as _Tensor +from .core._imperative_rt.core2 import apply +from .core._trace_option import use_symbolic_shape +from .core.ops.builtin import Copy, GetVarShape from .core.tensor.raw_tensor import as_device +from .core.tensor.tensor_wrapper import ArrayMethodMixin from .device import _valid_device, get_default_device from .utils.deprecation import deprecated -class Tensor(_Tensor): +class Tensor(_Tensor, ArrayMethodMixin): grad = None dmap_callback = None + q_dict = {"mode": None, "scale": None, "zero_point": None} - def __init__(self, data, dtype=None, device=None): + def __new__(cls, data, dtype=None, device=None): if device is None: - device = get_default_device() - self.q_dict = {"mode": None, "scale": None, "zero_point": None} - super().__init__(data, dtype=dtype, device=device) + cn = get_default_device() + elif isinstance(device, str): + if cls.dmap_callback is not None: + cn = CompNode(cls.dmap_callback(device)) + else: + cn = CompNode(device) + else: + assert isinstance(device, CompNode) + cn = device + + if isinstance(data, _Tensor): + obj = _Tensor.__new__(cls, data) + else: + obj = _Tensor.__new__(cls, data, dtype, cn) + return obj + + @property + def shape(self): + shape = super().shape + if shape == () or not use_symbolic_shape(): + return shape + return apply(GetVarShape(), self)[0] + + @property + def _tuple_shape(self): + return super().shape + + def __repr__(self): + piece = "Tensor(" + with np.printoptions(precision=4, suppress=True): + piece += "{}".format(str(self.numpy())) + if self.dtype != np.float32: + piece += ", dtype={}".format(np.dtype(self.dtype).name) + piece += ", device={}".format(self.device) + ")" + return piece @deprecated(version="1.0", reason="no need to reuse an existing tensor since 1.0") def set_value(self, value): + if not isinstance(value, _Tensor): + value = Tensor(value, dtype=self.dtype, device=self.device) self._reset(value) @deprecated(version="1.0", reason="use *= 0 instead") @@ -61,27 +101,22 @@ class Tensor(_Tensor): def __hash__(self): return id(self) + def __getnewargs__(self): + r""" __getnewargs__ will be called for pickle serialization or deep copy + """ + return (self.numpy(), self.dtype, self.device.logical_name) + def __getstate__(self): r""" __getstate__ will be called for pickle serialization or deep copy """ state = { - "data": self.numpy(), - "device": self.device.logical_name, - "dtype": self.dtype, "qdict": self.q_dict, } return state def __setstate__(self, state): - data = state.pop("data") - logical_device = state.pop("device") - if self.dmap_callback is not None: - assert isinstance(logical_device, str) - logical_device = self.dmap_callback(logical_device) - dtype = state.pop("dtype") self.q_dict = state.pop("qdict") - super().__init__(data, dtype=dtype, device=logical_device) def detach(self): r""" @@ -89,8 +124,7 @@ class Tensor(_Tensor): during backward gradient calcuation, i.e. its gradient is zero. """ Wrapper = type(self) - Tensor = type(self.__wrapped__) - return Wrapper(Tensor(self.__wrapped__._data)) + return Wrapper(self) tensor = Tensor diff --git a/imperative/python/src/grad.cpp b/imperative/python/src/grad.cpp new file mode 100644 index 000000000..786a54b45 --- /dev/null +++ b/imperative/python/src/grad.cpp @@ -0,0 +1,404 @@ +/** + * \file imperative/python/src/grad.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "./grad.h" +#include "megbrain/imperative/proxy_graph_detail.h" +#include "megbrain/imperative/ops/autogen.h" +#include "megbrain/utils/mempool.h" + +namespace py = pybind11; + +namespace mgb::imperative::python { + +namespace { + +struct GradSlotWeakPtr { + std::weak_ptr grad_fn; + size_t idx; +}; + +} // namespace + +struct GradProducerRecord : intrusive_list::Node { + using Base = intrusive_list::Node; + + GradProducerRecord() = default; + GradProducerRecord(GradProducerRecord::head_t& head) : Base(intrusive_list::after_t{}, head) {} + // GradProducerRecord(GradProducerRecord&&) = default; + // GradProducerRecord& operator=(GradProducerRecord&) = default; + // GradProducerRecord& operator=(GradProducerRecord&&) = default; +}; + +struct GradSlot { + std::shared_ptr grad; + py::object callback; + GradProducerRecord::head_t producer_head; +}; + +struct GradSlotProducerPtr : GradSlotPtr { + GradProducerRecord producer_record; + + GradSlotProducerPtr() = default; + GradSlotProducerPtr(GradInfo& info) : GradSlotPtr(info), producer_record(info->producer_head) {} +}; + +struct GradFn : std::enable_shared_from_this { + static MemPool pool; + + std::weak_ptr key; + SmallVector slots; + SmallVector dsts; + SmallVector> closure; + std::shared_ptr backward_graph; + bool in_ref_keeper = false; + + static void deleter(GradFn* ptr) { + pool.free(ptr); + } + + std::shared_ptr make() { + return std::shared_ptr(pool.alloc(), &deleter); + } + + void clear() { + key.reset(); + slots.clear(); + dsts.clear(); + closure.clear(); + backward_graph.reset(); + } +}; + +GradSlot* GradSlotPtr::operator->() { + return &grad_fn->slots[idx]; +} + +namespace { + +struct BackwardGraphCache : std::unordered_map>, CompNodeDepedentObject { + std::shared_ptr on_comp_node_finalize() override { + clear(); + return {}; + } +} backward_graph_cache; + +std::shared_ptr make_backward_graph( + ApplyContext& ctx, const apply_result_t& outputs) { + // hash + static_assert(alignof(size_t) % alignof(bool) == 0); + size_t buf_size = (1 + ctx.nargs * 2) * sizeof(size_t) + ctx.nargs * sizeof(bool); + alignas(alignof(size_t)) std::byte buf[buf_size]; + size_t* size_t_ptr = reinterpret_cast(buf); + bool* bool_ptr = reinterpret_cast(size_t_ptr + (1 + ctx.nargs * 2)); + bool* bool_ptr0 = bool_ptr; + *(size_t_ptr++) = ctx.op->hash(); + for (size_t i = 0; i < ctx.nargs; ++i) { + *(size_t_ptr++) = mgb::hash(ctx.args[i]->dtype().handle()); + *(size_t_ptr++) = mgb::hash(ctx.args[i]->comp_node()); + *(bool_ptr++) = bool(ctx.args[i]->m_grad_info.grad_fn); + } + mgb_assert(bool_ptr0 == reinterpret_cast(size_t_ptr) && + bool_ptr == reinterpret_cast(buf + buf_size)); + size_t key = XXHash{}.update(buf, buf_size).digest(); + + auto&& iter = backward_graph_cache.find(key); + if (iter != backward_graph_cache.end()) { + return iter->second; + } + + // slow path + SmallVector inputs(ctx.nargs); + SmallVector input_requires_grad(ctx.nargs, false); + SmallVector output_has_grad(outputs.size(), true); + for (size_t i = 0; i < ctx.nargs; ++i) { + inputs[i].comp_node = ctx.args[i]->comp_node(); + inputs[i].layout.dtype = ctx.args[i]->dtype(); + input_requires_grad[i] = bool(ctx.args[i]->m_grad_info.grad_fn); + } + auto result = std::make_shared( + proxy_graph_detail::make_backward_graph( + *ctx.op, inputs, input_requires_grad, output_has_grad)); + if (!result->backward) { + result.reset(); + } + backward_graph_cache.emplace(key, result); + return result; +} + +} // namespace + +apply_result_t apply_grad(ApplyContext& ctx) { + std::shared_ptr grad_key; + for (size_t i = 0; i < ctx.nargs; ++i) { + auto* tensor = ctx.args[i]; + if (tensor->m_grad_info.grad_fn) { + auto&& input_grad_key = tensor->m_grad_info.grad_fn->key.lock(); + // tensor is attached to a live GradKey + if (input_grad_key && input_grad_key->active) { + if (grad_key) { + if (grad_key != input_grad_key) { + PyErr_SetString(PyExc_NotImplementedError, "second order grad"); + throw pyext17::py_err_set(); + } + } else { + grad_key = std::move(input_grad_key); + } + } else { + // cleanup stale grad info + // under what condition? + tensor->m_grad_info = {}; + tensor->m_flags &= ~Tensor::Flags::GRAD; + } + } else { + tensor->m_flags &= ~Tensor::Flags::GRAD; + } + } + + ctx.flags &= ~Tensor::Flags::GRAD; + + // perform forward apply_op or trace + auto outputs = apply(ctx); + + if (!grad_key) { + return outputs; + } + + auto backward_graph = make_backward_graph(ctx, outputs); + if (!backward_graph) { + return outputs; + } + + auto grad_fn = std::make_shared(); + grad_fn->key = grad_key; + grad_fn->slots.resize(outputs.size()); + grad_fn->backward_graph = std::move(backward_graph); + + grad_fn->dsts.reserve(ctx.nargs); + for (size_t i = 0; i < ctx.nargs; ++i) { + if (grad_fn->backward_graph->input_has_grad[i]) { + auto& input_grad_info = ctx.args[i]->m_grad_info; + grad_fn->dsts.emplace_back(input_grad_info); + grad_fn->dsts.back().producer_record.insert_after(input_grad_info->producer_head); + } else { + grad_fn->dsts.emplace_back(); + } + } + + auto& save_for_backward = grad_fn->backward_graph->save_for_backward; + grad_fn->closure.reserve(std::count_if(save_for_backward.begin(), save_for_backward.end(), [](bool p){return p;})); + + // given op, taking gradient of output_tensor_list wrt input_tensor_list: + // + // save_for_backward[0:nargs-1]: whether input tensor requires gradient, + // i.e., whether it is in input_tensor_list + // + // save_for_backward[nargs:nargs+outputs.size()-1]: whether output tensor is + // needed to calculate gradients + // + // save_for_backward[-outputs.size():]: whether output tensor is in + // output_tensor_list + // + // Example: perform c = a * b, where a is input data, b is parameter to be + // optimized, save_for_backward = [1, 1, 0, 1] + mgb_assert(ctx.nargs + 2 * outputs.size() == save_for_backward.size()); + + // record input tensors needed to take grad + for (size_t i = 0; i < ctx.nargs; ++i) { + if (save_for_backward[i]) { + grad_fn->closure.push_back(ctx.args[i]->shared_from_this()); + } + } + // record output tensors needed to take grad + for (size_t i = 0; i < outputs.size(); ++i) { + bool requires_grad = save_for_backward[ctx.nargs + outputs.size() + i]; + if (save_for_backward[ctx.nargs + i]) { + grad_fn->closure.push_back(outputs[i]); + if (requires_grad) { + // avoid reference cycle [Tensor <-> GradFn] + outputs[i] = outputs[i]->copy(); + } + } + if (requires_grad) { + auto& grad_info = outputs[i]->m_grad_info; + grad_info.grad_fn = grad_fn; + grad_info.idx = i; + grad_info.insert_after(grad_key->free_vars_head); + outputs[i]->m_flags |= Tensor::Flags::GRAD; + } + } + + // record forward history + grad_key->tape.emplace_back(grad_fn); + + return outputs; +} + +void GradKeyWrapper::attach(PyObject*const* args, size_t nargs) { + if (nargs != 2) { + throw py::type_error("expect 2 arguments"); + } + auto* tw = TensorWrapper::cast_safe(args[0]); + if (!tw) { + throw py::type_error("argument 1 must be Tensor"); + } + auto* tensor = tw->m_tensor.get(); + py::object callback; + if (args[1] != Py_None) { + callback = py::reinterpret_borrow(args[1]); + } + m_key->attach(tensor, std::move(callback)); +} + +//! GradKey is weakly refered by tensor->m_grad_info.grad_fn->key after attach +void GradKey::attach(Tensor* tensor, pybind11::object callback) { + if (!active) { + throw py::value_error("grad key finalized"); + } + + if (tensor->m_grad_info.grad_fn) { + if (tensor->m_grad_info.grad_fn->key.lock().get() != this) { + PyErr_SetString(PyExc_NotImplementedError, "second order grad"); + throw pyext17::py_err_set(); + } + if (tensor->m_grad_info->callback) { + throw py::value_error("callback already set on this tensor"); + } + } else { + tensor->m_grad_info.idx = 0; + auto& grad_fn = tensor->m_grad_info.grad_fn; + grad_fn = std::make_shared(); + grad_fn->key = shared_from_this(); + grad_fn->slots.resize(1); + tensor->m_grad_info.insert_after(free_vars_head); + tensor->m_flags |= Tensor::Flags::GRAD; + } + tensor->m_grad_info.grad_fn->slots[0].callback = std::move(callback); +} + +void accum_grad(std::shared_ptr& grad, std::shared_ptr&& delta) { + if (!grad) { + grad = std::forward(delta); + return; + } + static ApplyContext ctx; + if (!ctx.op) { + ctx.op = std::shared_ptr(new Elemwise(Elemwise::Mode::ADD)); + ctx.nargs = 2; + } + Tensor* args[2] = {grad.get(), delta.get()}; + ctx.args = args; + ctx.flags = grad->m_flags | delta->m_flags; + + grad = apply(ctx)[0]; +} + +void GradKey::backward(std::vector tensors, std::vector grads) { + if (!active) { + throw py::value_error("finalized"); + } + if (tensors.size() != grads.size()) { + throw py::value_error("tensor and grad size mismatch"); + } + + // this GradKey is marked inactive here + active = false; + struct CleanupGuard { + GradKey* owner; + CleanupGuard(GradKey* this_) : owner(this_) {} + ~CleanupGuard() {owner->cleanup();} + } _cleanup_guard(this); + + if (tape.empty() || grads.empty()) return; + PyTypeObject* pytype = Py_TYPE(grads[0]->self().ptr()); + + for (size_t i = 0; i < tensors.size(); ++i) { + auto& grad_info = tensors[i]->m_tensor->m_grad_info; + if (grad_info.grad_fn && grad_info.grad_fn->key.lock().get() == this) { + grad_info->grad = grads[i]->m_tensor; + } + } + + std::vector> ref_keeper; + ref_keeper.reserve(tape.size()); + // back-propagation in reverse order + for (std::ptrdiff_t k = tape.size() - 1; k >= 0; --k) { + auto&& grad_fn = tape[k].lock(); + if (!grad_fn) continue; + if (grad_fn->backward_graph) { + for (size_t i = 0; i < grad_fn->slots.size(); ++i) { + // grad_fn->dsts correspond to input tensors during forward + // calculation, grad_fn->slots correspond to output tensors. + // condition true means the output tensor has gradient for + // back-propagation + if (grad_fn->backward_graph->save_for_backward[grad_fn->dsts.size() + grad_fn->slots.size() + i]) { + grad_fn->closure.push_back(std::move(grad_fn->slots[i].grad)); + } + } + ApplyContext ctx; + ctx.op = grad_fn->backward_graph->backward; + ctx.flags = 0; + ctx.nargs = grad_fn->closure.size(); + Tensor* args[ctx.nargs]; + for (size_t i = 0; i < ctx.nargs; ++i) { + args[i] = grad_fn->closure[i].get(); + mgb_assert(args[i]); + ctx.flags |= args[i]->m_flags; + } + ctx.args = args; + + auto grads = apply(ctx); + + size_t j = 0; + for (size_t i = 0; i < grad_fn->dsts.size(); ++i) { + if (grad_fn->backward_graph->input_has_grad[i]) { + auto& dst = grad_fn->dsts[i]; + // grads[j] is consumed in accum_grad + accum_grad(dst->grad, std::move(grads[j])); + ++j; + } + } + mgb_assert(j == grads.size()); + } + for (auto&& dst : grad_fn->dsts) { + if (!dst.grad_fn) continue; + if (!dst.grad_fn->in_ref_keeper) { + dst.grad_fn->in_ref_keeper = true; + ref_keeper.push_back(dst.grad_fn); + } + // grad_fn->clear will unlink current dst.producer_record + // such that if dst.producer_record.next is false, dst accumulates + // all the gradients + if (!dst.producer_record.next && dst->callback && dst->grad) { + dst->callback(TensorWrapper::make(pytype, dst->grad)); + } + } + grad_fn->clear(); + } // finish tape loop +} + +void GradKey::cleanup() { + active = false; + tape.clear(); + for (intrusive_list::Iterator it(free_vars_head); it;) { + it->grad_fn.reset(); + (it++)->unlink(); + } +} + +void GradKeyWrapper::backward(std::vector tensors, std::vector grads) { + m_key->backward(std::move(tensors), std::move(grads)); +} + +GradKey::~GradKey() { + cleanup(); +} + +} // namespace mgb::imperative::python diff --git a/imperative/python/src/grad.h b/imperative/python/src/grad.h new file mode 100644 index 000000000..e94c22922 --- /dev/null +++ b/imperative/python/src/grad.h @@ -0,0 +1,54 @@ +/** + * \file imperative/python/src/grad.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "./tensor.h" + +#include +#include + +namespace mgb::imperative::python { + +apply_result_t apply_grad(ApplyContext& ctx); + +struct GradKey : std::enable_shared_from_this, NonCopyableObj { + std::string name; + bool active = true; + GradInfo::head_t free_vars_head; + std::vector> tape; + + ~GradKey(); + + void attach(Tensor* tensor, pybind11::object callback); + void backward(std::vector, std::vector); + void cleanup(); +}; + +struct GradKeyWrapper { + using wrap_t = pyext17::wrap; + static constexpr auto tp_name = pybind11::detail::_("GradKey"); + + std::shared_ptr m_key; + + inline GradKeyWrapper() : m_key(std::make_shared()) {} + + void attach(PyObject*const* args, size_t nargs); + void backward(std::vector, std::vector); +}; + +} // namespace mgb::imperative::python + +namespace pybind11::detail { + +template<> struct type_caster : mgb::imperative::python::GradKeyWrapper::wrap_t::caster {}; + +} // namespace pybind11::detail diff --git a/imperative/python/src/grad_info.h b/imperative/python/src/grad_info.h new file mode 100644 index 000000000..676b598bf --- /dev/null +++ b/imperative/python/src/grad_info.h @@ -0,0 +1,36 @@ +/** + * \file imperative/python/src/grad_info.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include + +#include "./intrusive_list.h" + +namespace mgb::imperative::python { + +struct GradFn; +struct GradSlot; + +struct GradSlotPtr { + std::shared_ptr grad_fn; + size_t idx; + + GradSlot* operator->(); +}; + +struct GradInfo : GradSlotPtr, intrusive_list::Node { + GradInfo() = default; + GradInfo(GradInfo&) = default; + GradInfo(GradInfo&&) = default; + GradInfo& operator=(GradInfo&) = default; + GradInfo& operator=(GradInfo&&) = default; +}; + +} // namespace mgb::imperative::python diff --git a/imperative/python/src/intrusive_list.h b/imperative/python/src/intrusive_list.h new file mode 100644 index 000000000..9246dee0a --- /dev/null +++ b/imperative/python/src/intrusive_list.h @@ -0,0 +1,227 @@ +/** + * \file imperative/python/src/intrusive_list.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "megbrain/utils/metahelper.h" + +namespace mgb::imperative::python::intrusive_list { + +// copy policy +struct after_t {}; +struct before_t {}; +struct disable_t {}; + +template struct Tail; + +// invariant: next->prev == this +template +struct Head { + Tail* next; + + Head(Tail* node = nullptr) : next(node) {} + Head(const Head&) = delete; + Head& operator=(const Head&) = delete; + Head(Head&& rhs) : next(rhs.next) { + rhs.next = nullptr; + if (next) { + next->prev = this; + } + } + Head& operator=(Head&& rhs) { + mgb_assert(!next); + next = rhs.next; + rhs.next = nullptr; + if (next) { + next->prev = this; + } + return *this; + } + ~Head() { + if (next) { + next->prev = nullptr; + } + } +}; + +// invariant: prev->next == this +template +struct Tail { + Head* prev; + + Tail(Head* node = nullptr) : prev(node) {} + Tail(const Tail&) = delete; + Tail& operator=(const Tail&) = delete; + Tail(Tail&& rhs) : prev(rhs.prev) { + rhs.prev = nullptr; + if (prev) { + prev->next = this; + } + } + Tail& operator=(Tail&& rhs) { + mgb_assert(!prev); + prev = rhs.prev; + rhs.prev = nullptr; + if (prev) { + prev->next = this; + } + return *this; + } + ~Tail() { + if (prev) { + prev->next = nullptr; + } + } +}; + +template struct Node; + +template +class Iterator { + T* ptr; + + void inc() {ptr = static_cast(ptr->Head::next);} + void dec() {ptr = static_cast(ptr->Head::prev);} + +public: + Iterator(Head& head) : ptr(static_cast(head.next)) {} + Iterator(Tail& tail) : ptr(static_cast(tail.prev)) {} + + template + Iterator(Node& node) : ptr(static_cast(&node)) {} + + T& operator*() {return *static_cast(ptr);} + T* operator->() {return static_cast(ptr);} + + operator bool() {return ptr;} + bool operator==(const Iterator& rhs) {return ptr == rhs.ptr;} + + Iterator& operator++() {inc(); return *this;} + Iterator& operator--() {dec(); return *this;} + Iterator operator++(int) {auto ret = *this; inc(); return ret;} + Iterator operator--(int) {auto ret = *this; dec(); return ret;} +}; + +// Node in a doubly linked list. Unlike std::list, nodes are not owned by a container. +// Instead, nodes may join or leave a list freely. +// NOTE: Derived classes have to explicitly declare copy / assignment as default, +// otherwise the compiler generated version would use the const T& signature, +// which is deleted. +template +struct Node : Tail, Node, T>>, + Head, Node, T>> { +private: + using this_t = Node; + using U = std::conditional_t, this_t, T>; + +public: + using head_t = Head; + using tail_t = Tail; + using head_t::next; + using tail_t::prev; + + Node() = default; + Node(const this_t&) = delete; + this_t& operator=(const this_t&) = delete; + + //! constructed node is inserted after the input node + Node(after_t, head_t& node) : tail_t(&node), head_t(node.next) { + node.next = this; + if (next) { + next->prev = this; + } + } + + //! constructed node is inserted before the input node + Node(before_t, tail_t& node) : head_t(&node), tail_t(node.prev) { + node.prev = this; + if (prev) { + prev->next = this; + } + } + + Node(this_t&& rhs) : tail_t(rhs.prev), head_t(rhs.next) { + rhs.prev = nullptr; + rhs.next = nullptr; + if (prev) { + prev->next = this; + } + if (next) { + next->prev = this; + } + } + + Node& operator=(this_t&& rhs) { + unlink(); + prev = rhs.prev; + next = rhs.next; + rhs.prev = nullptr; + rhs.next = nullptr; + if (prev) { + prev->next = this; + } + if (next) { + next->prev = this; + } + return *this; + } + + template || std::is_same_v, void>> + Node(this_t& rhs) : Node(policy{}, rhs) {} + + template || std::is_same_v, void>> + this_t& operator=(this_t& rhs) { + insert(policy{}, rhs); + return *this; + } + + void unlink() { + if (prev) { + prev->next = next; + } + if (next) { + next->prev = prev; + } + prev = nullptr; + next = nullptr; + } + + //! this node is unlinked from its list and inserted after the input node + void insert(after_t, head_t& node) { + unlink(); + prev = &node; + next = node.next; + node.next = this; + if (next) { + next->prev = this; + } + } + + //! this node is unlinked from its list and inserted before the input node + void insert(before_t, tail_t& node) { + unlink(); + next = &node; + prev = node.prev; + node.prev = this; + if (prev) { + prev->next = this; + } + } + + void insert_before(tail_t& node) {insert(before_t{}, node);} + void insert_after(head_t& node) {insert(after_t{}, node);} + + ~Node() { + unlink(); + } +}; + +} // namespace mgb::imperative::python::intrusive_list diff --git a/imperative/python/src/module.cpp b/imperative/python/src/module.cpp index 5e9e55995..b2eab0352 100644 --- a/imperative/python/src/module.cpp +++ b/imperative/python/src/module.cpp @@ -23,7 +23,10 @@ #include "./dispatcher.h" +#include "./tensor.h" + namespace py = pybind11; +using namespace mgb::imperative::python; #ifndef MODULE_NAME #define MODULE_NAME imperative_rt @@ -68,4 +71,6 @@ PYBIND11_MODULE(MODULE_NAME, m) { py::getattr(m, "__dict__")); init_dispatcher(submodule(m, "dispatcher")); + + init_tensor(submodule(m, "core2")); } diff --git a/imperative/python/src/pyext17.h b/imperative/python/src/pyext17.h index ee508dcab..63c161ead 100644 --- a/imperative/python/src/pyext17.h +++ b/imperative/python/src/pyext17.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace pyext17 { @@ -53,6 +54,26 @@ inline PyObject* cvt_retval(PyObject* rv) { return cvt_retval(__VA_ARGS__); \ } +inline int cvt_retint(int ret) { + return ret; +} + +#define CVT_RET_INT(...) \ + if constexpr (std::is_same_v) { \ + __VA_ARGS__; \ + return 0; \ + } else { \ + return cvt_retint(__VA_ARGS__); \ + } + + +struct py_err_set : std::exception {}; + +#define HANDLE_ALL_EXC(RET) catch(py_err_set&) {return RET;} \ + catch(pybind11::error_already_set& e) {e.restore(); return RET;} \ + catch(pybind11::builtin_exception& e) {e.set_error(); return RET;} \ + catch(std::exception& e) {PyErr_SetString(PyExc_RuntimeError, e.what()); return RET;} + template struct wrap { private: @@ -111,7 +132,9 @@ private: static PyObject* impl(PyObject* self, PyObject*) { auto* inst = reinterpret_cast(self)->inst(); - CVT_RET_PYOBJ((inst->*f)()); + try { + CVT_RET_PYOBJ((inst->*f)()); + } HANDLE_ALL_EXC(nullptr) } }; @@ -121,7 +144,9 @@ private: static PyObject* impl(PyObject* self, PyObject* args, PyObject* kwargs) { auto* inst = reinterpret_cast(self)->inst(); - CVT_RET_PYOBJ((inst->*f)(args, kwargs)); + try { + CVT_RET_PYOBJ((inst->*f)(args, kwargs)); + } HANDLE_ALL_EXC(nullptr) } }; @@ -132,7 +157,9 @@ private: static PyObject* impl(PyObject* self, PyObject*const* args, Py_ssize_t nargs) { auto* inst = reinterpret_cast(self)->inst(); - CVT_RET_PYOBJ((inst->*f)(args, nargs)); + try { + CVT_RET_PYOBJ((inst->*f)(args, nargs)); + } HANDLE_ALL_EXC(nullptr) } #else static constexpr int flags = METH_VARARGS; @@ -141,7 +168,9 @@ private: auto* inst = reinterpret_cast(self)->inst(); auto* arr = &PyTuple_GET_ITEM(args, 0); auto size = PyTuple_GET_SIZE(args); - CVT_RET_PYOBJ((inst->*f)(arr, size)); + try { + CVT_RET_PYOBJ((inst->*f)(arr, size)); + } HANDLE_ALL_EXC(nullptr) } #endif }; @@ -152,7 +181,9 @@ private: static PyObject* impl(PyObject* self, PyObject* obj) { auto* inst = reinterpret_cast(self)->inst(); - CVT_RET_PYOBJ((inst->*f)(obj)); + try { + CVT_RET_PYOBJ((inst->*f)(obj)); + } HANDLE_ALL_EXC(nullptr) } }; @@ -162,6 +193,55 @@ private: return {name, (PyCFunction)M::impl, M::flags, doc}; } + template + struct getter { + using F = decltype(f); + + static PyObject* impl(PyObject* self, void* closure) { + auto* inst = reinterpret_cast(self)->inst(); + try { + if constexpr (std::is_invocable_v) { + CVT_RET_PYOBJ(f(self, closure)); + } else if constexpr (std::is_invocable_v) { + CVT_RET_PYOBJ((inst->*f)(closure)); + } else if constexpr (std::is_invocable_v) { + CVT_RET_PYOBJ((inst->*f)()); + } else { + static_assert(!std::is_same_v); + } + } HANDLE_ALL_EXC(nullptr) + } + }; + + template + struct setter { + using F = decltype(f); + + template + static int impl_(PyObject* self, PyObject* val, void* closure) { + auto* inst = reinterpret_cast(self)->inst(); + try { + if constexpr (std::is_invocable_v) { + CVT_RET_INT(f(self, val, closure)); + } else if constexpr (std::is_invocable_v) { + CVT_RET_INT((inst->*f)(val, closure)); + } else if constexpr (std::is_invocable_v) { + CVT_RET_INT((inst->*f)(val)); + } else { + static_assert(!std::is_same_v); + } + } HANDLE_ALL_EXC(-1) + } + + static constexpr auto impl = []() {if constexpr (std::is_same_v) return nullptr; + else return impl_<>;}(); + }; + + template + static constexpr PyGetSetDef make_getset_def(const char* name, const char* doc = nullptr, void* closure = nullptr) { + return {const_cast(name), getter::impl, setter::impl, const_cast(doc), closure}; + } + // polyfills struct tp_vectorcall { @@ -216,16 +296,26 @@ private: template static PyObject* impl(PyTypeObject* type, PyObject* args, PyObject* kwargs) { + struct FreeGuard { + PyObject* self; + PyTypeObject* type; + ~FreeGuard() {if (self) type->tp_free(self);} + }; + auto* self = type->tp_alloc(type, 0); + FreeGuard free_guard{self, type}; auto* inst = reinterpret_cast(self)->inst(); if constexpr (has_vectorcall && tp_vectorcall::valid) { reinterpret_cast(self)->vectorcall_slot = &tp_vectorcall::template impl<>; } - if constexpr (varkw) { - new(inst) T(args, kwargs); - } else { - new(inst) T(); - } + try { + if constexpr (varkw) { + new(inst) T(args, kwargs); + } else { + new(inst) T(); + } + } HANDLE_ALL_EXC(nullptr) + free_guard.self = nullptr; return self; } @@ -250,6 +340,7 @@ private: public: class TypeBuilder { std::vector m_methods; + std::vector m_getsets; PyTypeObject m_type; bool m_finalized = false; bool m_ready = false; @@ -259,6 +350,13 @@ public: throw std::runtime_error("type is already finalized"); } } + + static const char* to_c_str(const char* s) {return s;} + + template + static const char* to_c_str(const pybind11::detail::descr& desc) { + return desc.text; + } public: TypeBuilder(const TypeBuilder&) = delete; TypeBuilder& operator=(const TypeBuilder&) = delete; @@ -266,7 +364,7 @@ public: TypeBuilder() : m_type{PyVarObject_HEAD_INIT(nullptr, 0)} { constexpr auto has_tp_name = HAS_MEMBER(T, tp_name); if constexpr (has_tp_name) { - m_type.tp_name = T::tp_name; + m_type.tp_name = to_c_str(T::tp_name); } m_type.tp_dealloc = tp_dealloc::value; #ifdef _Py_TPFLAGS_HAVE_VECTORCALL @@ -291,8 +389,17 @@ public: return m_ready; } + bool isinstance(PyObject* op) { + return PyObject_TypeCheck(op, &m_type); + } + + bool isexact(PyObject* op) { + return Py_TYPE(op) == &m_type; + } + PyObject* finalize() { if (!m_finalized) { + m_finalized = true; if (m_methods.size()) { m_methods.push_back({0}); if (m_type.tp_methods) { @@ -301,6 +408,14 @@ public: } m_type.tp_methods = &m_methods[0]; } + if (m_getsets.size()) { + m_getsets.push_back({0}); + if (m_type.tp_getset) { + PyErr_SetString(PyExc_SystemError, "tp_getset is already set"); + return nullptr; + } + m_type.tp_getset = &m_getsets[0]; + } if (PyType_Ready(&m_type)) { return nullptr; } @@ -315,12 +430,64 @@ public: m_methods.push_back(make_meth_def(name, doc)); return *this; } + + template + TypeBuilder& def_getset(const char* name, const char* doc = nullptr, void* closure = nullptr) { + check_finalized(); + m_getsets.push_back(make_getset_def(name, doc, closure)); + return *this; + } }; static TypeBuilder& type() { static TypeBuilder type_helper; return type_helper; } + + template + static PyObject* cnew(Args&&... args) { + auto* pytype = type().operator->(); + auto* self = pytype->tp_alloc(pytype, 0); + auto* inst = reinterpret_cast(self)->inst(); + if constexpr (has_vectorcall && tp_vectorcall::valid) { + reinterpret_cast(self)->vectorcall_slot = &tp_vectorcall::template impl<>; + } + new(inst) T(std::forward(args)...); + return self; + } + + template + static PyObject* cnew_with_type(PyTypeObject* pytype, Args&&... args) { + + auto* self = pytype->tp_alloc(pytype, 0); + auto* inst = reinterpret_cast(self)->inst(); + if constexpr (has_vectorcall && tp_vectorcall::valid) { + reinterpret_cast(self)->vectorcall_slot = &tp_vectorcall::template impl<>; + } + new(inst) T(std::forward(args)...); + return self; + } + + struct caster { + static constexpr auto name = T::tp_name; + + T* value; + + bool load(pybind11::handle src, bool convert) { + if (wrap_t::type().isinstance(src.ptr())) { + value = reinterpret_cast(src.ptr())->inst(); + return true; + } + return false; + } + + template using cast_op_type = pybind11::detail::cast_op_type; + operator T*() { return value; } + operator T&() { return *value; } + }; + + + }; } // namespace pyext17 @@ -328,3 +495,5 @@ public: #undef HAS_MEMBER_TYPE #undef HAS_MEMBER #undef CVT_RET_PYOBJ +#undef CVT_RET_INT +#undef HANDLE_ALL_EXC diff --git a/imperative/python/src/tensor.cpp b/imperative/python/src/tensor.cpp new file mode 100644 index 000000000..2eefbc0dc --- /dev/null +++ b/imperative/python/src/tensor.cpp @@ -0,0 +1,257 @@ +/** + * \file imperative/python/src/tensor.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "./tensor.h" +#include "./grad.h" +#include "./common.h" +#include "./numpy_dtypes.h" + +#include +#include + +namespace py = pybind11; + +namespace mgb::imperative::python { + +std::unique_ptr interpreter_for_py; + +apply_result_t apply(ApplyContext& ctx) { + // emulating scalar should be put to specific op's apply, e.g., + // elementwise, reduce, typecvt. Currently it's still handled at python + // side. It could be move to C++ side if it has an impact on performance + if (ctx.flags & Tensor::Flags::SCALAR) { + // TODO: emulate scalar + } + + if (ctx.flags & Tensor::Flags::GRAD) { + return apply_grad(ctx); + } + + if (ctx.flags & Tensor::Flags::TRACE) { + // TODO: trace + } else { + SmallVector handles(ctx.nargs); + for (size_t i = 0; i < ctx.nargs; ++i) { + handles[i] = ctx.args[i]->m_handle.get(); + } + + auto output_handles = interpreter_for_py->apply_op(ctx.op, handles); + + apply_result_t outputs; + outputs.reserve(output_handles.size()); + for (auto h : output_handles) { + outputs.emplace_back(std::make_shared(h)); + } + return outputs; + } + + mgb_assert(0); +} + +PyObject* py_apply(PyObject* self, PyObject*const* args, size_t nargs/* , PyObject* kwnames */) { + try { + + // if (kwnames && PyTuple_GET_SIZE(kwnames)) { + // PyErr_SetString(PyExc_TypeError, "keyword argument not allowed"); + // return nullptr; + // } + if (!nargs) { + PyErr_SetString(PyExc_TypeError, "expect Op"); + return nullptr; + } + auto* op = args[0]; + if (!strcmp(op->ob_type->tp_base->tp_name,"PodOpVisitor") || !strcmp(op->ob_type->tp_base->tp_name,"IndexingOpBase")){ + op = PyObject_CallMethod(op,"to_c",""); + } + + PyTypeObject* pytype = args[1]->ob_type; + ++args; + --nargs; + + ApplyContext ctx; + ctx.flags = 0; + ctx.op = py::handle(op).cast>(); + SmallVector tensors(nargs); + ctx.args = &tensors[0]; + ctx.nargs = nargs; + + for (size_t i = 0; i < nargs; ++i) { + TensorWrapper* tw = TensorWrapper::cast_safe(args[i]); + if (!tw) { + PyErr_SetString(PyExc_TypeError, "expect Tensor"); + return nullptr; + } + auto* t = tensors[i] = tw->m_tensor.get(); + ctx.flags |= t->m_flags; + } + + // TODO: set TRACE flag + + auto outputs = apply(ctx); + size_t nout = outputs.size(); + auto ret = py::tuple(nout); + for (size_t i = 0; i < nout; ++i) { + ret[i] = TensorWrapper::make(pytype, std::move(outputs[i])); + } + return ret.release().ptr(); + + } catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return nullptr; + } +} + + +TensorWrapper::TensorWrapper(PyObject* args, PyObject* kwargs) { + if (kwargs && PyDict_Size(kwargs)) { + throw py::type_error("keyword argument not allowed"); + } + auto nargs = PyTuple_Size(args); + auto tup = py::reinterpret_borrow(args); + if (nargs == 0) { + throw py::type_error("too few arguments"); + } + if (auto* t = cast_safe(tup[0].ptr())) { + if (nargs > 1) { + throw py::type_error("expect 1 argument"); + } + m_tensor = t->m_tensor; + } else { + if (nargs != 3) { + throw py::type_error("expect 3 arguments"); + } + py::detail::loader_life_support life_sup; // required to cast DType + auto data = tup[0].cast(); + DType dtype = tup[1].cast(); + CompNode cn = tup[2].cast(); + + interpreter::Interpreter::Handle handle; + constexpr auto size_threshhold = TensorShape::MAX_NDIM; + if (data.size() > size_threshhold) { + handle = interpreter_for_py->put(npy::np2tensor(data.ptr(), npy::Meth::borrow(cn), dtype)); + } else { + HostTensorND ret(cn); + handle = interpreter_for_py->put(npy::np2tensor(data.ptr(), npy::Meth::copy_into(&ret), dtype)); + } + + m_tensor = std::make_shared(handle); + if (data.ndim() == 0) { + m_tensor->m_flags |= Tensor::Flags::SCALAR; + } + } +} + + +PyObject* TensorWrapper::shape() { + if (m_tensor->m_flags & Tensor::Flags::SCALAR) { + return PyTuple_New(0); + } + auto&& shape = m_tensor->shape(); + if (!shape.ndim) { + Py_RETURN_NONE; + } + py::tuple ret(shape.ndim); + for (size_t i = 0; i < shape.ndim; ++i) { + ret[i] = shape[i]; + } + return ret.release().ptr(); +} + + +PyObject* TensorWrapper::dtype() { + return py::cast(m_tensor->dtype()).release().ptr(); +} + + +PyObject* TensorWrapper::device() { + return py::cast(m_tensor->comp_node()).release().ptr(); +} + + +PyObject* TensorWrapper::numpy() { + auto&& hv = interpreter_for_py->get_value(m_tensor->m_handle.get()); + auto arr = py::reinterpret_steal(npy::ndarray_from_tensor(hv, npy::ShareType::TRY_SHARE)); + if (!arr) return nullptr; + if (m_tensor->m_flags & Tensor::Flags::SCALAR) { + mgb_assert(PyArray_Check(arr.ptr())); + return PyArray_Squeeze(reinterpret_cast(arr.ptr())); + } + return arr.release().ptr(); +} + +void TensorWrapper::reset(PyObject* tensor) { + TensorWrapper* t = TensorWrapper::cast_safe(tensor); + if (!t) { + throw py::type_error("expect Tensor"); + } + m_tensor = t->m_tensor; +} + +PyObject* TensorWrapper::isscalar() { + if(m_tensor->m_flags & Tensor::Flags::SCALAR) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +void TensorWrapper::setscalar() { + m_tensor->m_flags |= Tensor::Flags::SCALAR; +} + + +struct TensorWeakRef { + std::weak_ptr wptr; + + TensorWeakRef(const TensorWrapper& tw) : wptr(tw.m_tensor) {} + + py::object operator()() { + if (auto p = wptr.lock()) { + return TensorWrapper::make(p); + } + return py::none(); + } +}; + + +void init_tensor(py::module m) { + interpreter_for_py = interpreter::Interpreter::inst().create_channel(); + + auto* tensor_type = TensorWrapper::wrap_t::type() + .def<&TensorWrapper::numpy>("numpy") + .def_getset<&TensorWrapper::shape>("shape") + .def_getset<&TensorWrapper::dtype>("dtype") + .def_getset<&TensorWrapper::device>("device") + .def<&TensorWrapper::reset>("_reset") + .def<&TensorWrapper::isscalar>("isscalar") + .def<&TensorWrapper::setscalar>("setscalar") + .finalize(); + if (!tensor_type) throw py::error_already_set(); + py::setattr(m, "Tensor", tensor_type); + + py::class_(m, "TensorWeakRef") + .def(py::init()) + .def("__call__", &TensorWeakRef::operator()); + + static PyMethodDef apply_def{"apply", (PyCFunction)py_apply, METH_FASTCALL, nullptr}; + auto* apply_func = PyCFunction_NewEx(&apply_def, nullptr, nullptr); + if (!apply_func) throw py::error_already_set(); + py::setattr(m, "apply", apply_func); + + py::handle grad_key_type = GradKeyWrapper::wrap_t::type() + .def<&GradKeyWrapper::attach>("attach") + .finalize(); + if (!grad_key_type) throw py::error_already_set(); + py::setattr(m, "GradKey", grad_key_type); + py::setattr(m, "backward", py::cpp_function(&GradKeyWrapper::backward)); +} + +} // namespace mgb::imperative::python diff --git a/imperative/python/src/tensor.h b/imperative/python/src/tensor.h new file mode 100644 index 000000000..73a436c46 --- /dev/null +++ b/imperative/python/src/tensor.h @@ -0,0 +1,157 @@ +/** + * \file imperative/python/src/tensor.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include + +#include "megbrain/imperative/interpreter.h" +#include "pybind11/pybind11.h" + +#include "./pyext17.h" + +namespace mgb::imperative::python { + +template +struct ObjectPtr : B { + using B::B; + T& operator*() {return reinterpret_cast(*B::ptr());} + T* operator->() {return reinterpret_cast(B::ptr());} +}; + +} // namespace mgb::imperative::python + +#include "./grad_info.h" // for struct GradInfo + +namespace mgb::imperative::python { + +struct TraceInfo { + +}; + +extern std::unique_ptr interpreter_for_py; + +class SharedHandle { + using Handle = interpreter::Interpreter::Handle; + static_assert(std::is_pointer_v); + std::shared_ptr> holder; + +public: + inline explicit SharedHandle(Handle handle) : holder(handle, [](auto* h){ + interpreter_for_py->del(h); + }) {} + SharedHandle(const SharedHandle&) = default; + SharedHandle& operator=(const SharedHandle&) = default; + SharedHandle(SharedHandle&&) = default; + SharedHandle& operator=(SharedHandle&&) = default; + + inline Handle get() {return holder.get();} +}; + + +struct Tensor : std::enable_shared_from_this, NonCopyableObj { + using flags_t = uint64_t; + + struct Flags { + static constexpr flags_t SCALAR = 1; + static constexpr flags_t GRAD = 1 << 1; + static constexpr flags_t TRACE = 1 << 2; + }; + + flags_t m_flags = 0; + + GradInfo m_grad_info; + TraceInfo m_trace_info; + SharedHandle m_handle; + + using Handle = interpreter::Interpreter::Handle; + + inline explicit Tensor(Handle handle) : m_handle(handle) {} + inline explicit Tensor(SharedHandle handle) : m_handle(std::move(handle)) {} + ~Tensor() = default; + + inline std::shared_ptr copy() { + auto ret = std::make_shared(m_handle); + ret->m_flags = m_flags; + ret->m_grad_info = m_grad_info; + ret->m_trace_info = m_trace_info; + return ret; + } + + inline DType dtype() {return interpreter_for_py->get_dtype(m_handle.get());} + inline CompNode comp_node() {return interpreter_for_py->get_device(m_handle.get());} + inline TensorShape shape() {return interpreter_for_py->get_shape(m_handle.get());} +}; + + +struct TensorWrapper { + std::shared_ptr m_tensor; + + inline TensorWrapper(std::shared_ptr tensor = {}) : m_tensor(std::move(tensor)) {} + TensorWrapper(PyObject* args, PyObject* kwargs); + ~TensorWrapper() = default; + + static constexpr auto tp_name = pybind11::detail::_("Tensor"); + + using wrap_t = pyext17::wrap; + friend wrap_t; + + inline static TensorWrapper* cast(PyObject* op) {return reinterpret_cast(op)->inst();} + inline static TensorWrapper* cast_safe(PyObject* op) { + if (!wrap_t::type().isinstance(op)) return nullptr; + return cast(op); + } + inline ObjectPtr self() {return wrap_t::pycast(this);} + + template + static ObjectPtr make(Args&&... args) { + auto* op = wrap_t::cnew(std::forward(args)...); + return pybind11::reinterpret_steal>(op); + } + + template + static ObjectPtr make(PyTypeObject* pytype, Args&&... args) { + auto* op = wrap_t::cnew_with_type(pytype,std::forward(args)...); + return pybind11::reinterpret_steal>(op); + } + + PyObject* shape(); + PyObject* dtype(); + PyObject* device(); + PyObject* numpy(); + void reset(PyObject*); + PyObject* isscalar(); + void setscalar(); +}; + + +PyObject* py_apply(PyObject* self, PyObject*const* args, size_t nargs/* , PyObject* kwnames */); + +struct ApplyContext { + Tensor::flags_t flags; + std::shared_ptr op; + Tensor*const* args; + size_t nargs; +}; + +using apply_result_t = SmallVector, 8>; + +apply_result_t apply(ApplyContext& ctx); + +void init_tensor(pybind11::module); + +} // namespace mgb::imperative::python + +namespace pybind11::detail { + +template<> struct type_caster : mgb::imperative::python::TensorWrapper::wrap_t::caster {}; + +} // namespace pybind11::detail diff --git a/imperative/python/src/trace.h b/imperative/python/src/trace.h new file mode 100644 index 000000000..d84d76a87 --- /dev/null +++ b/imperative/python/src/trace.h @@ -0,0 +1,17 @@ +/** + * \file imperative/python/src/trace.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +namespace mgb::imperative::python { + +struct TraceInfo { +}; + +} // namespace mgb::imperative::python diff --git a/imperative/python/test/unit/core/test_async_level.py b/imperative/python/test/unit/core/test_async_level.py index 08f4d28ce..f5a761f13 100644 --- a/imperative/python/test/unit/core/test_async_level.py +++ b/imperative/python/test/unit/core/test_async_level.py @@ -12,6 +12,7 @@ def test_basic(): config_async_level(3) +@pytest.mark.skip def test_level1_infer_value(): config_async_level(1) a = mge.tensor([[1, 2], [2, 3], [3, 4]], dtype="float32") @@ -22,6 +23,7 @@ def test_level1_infer_value(): d = F.reshape(a, c) +@pytest.mark.skip def test_level1_infer_shape_with_unknown(): config_async_level(2) a = mge.tensor([[1, 2, 2, 3]], dtype="float32") diff --git a/imperative/python/test/unit/core/test_autodiff.py b/imperative/python/test/unit/core/test_autodiff.py index 0c6693f0d..55a01a250 100644 --- a/imperative/python/test/unit/core/test_autodiff.py +++ b/imperative/python/test/unit/core/test_autodiff.py @@ -16,12 +16,11 @@ import pytest import megengine as mge import megengine.distributed as dist import megengine.functional as F -from megengine.core._imperative_rt import TensorAttr, imperative +from megengine.core._imperative_rt import TensorAttr, core2, imperative +from megengine.core._imperative_rt.core2 import TensorWeakRef, apply +from megengine.core._imperative_rt.imperative import sync from megengine.core.autodiff.grad import Grad from megengine.core.ops.builtin import Elemwise -from megengine.core.tensor.raw_tensor import as_raw_tensor -from megengine.core.tensor.tensor import Tensor, apply -from megengine.core.tensor.tensor_wrapper import TensorWrapper from megengine.distributed.helper import get_device_count_by_fork from megengine.functional.distributed import remote_recv, remote_send @@ -43,11 +42,11 @@ relu = _elwise(Elemwise.Mode.RELU) def as_tensor(x): - return Tensor(as_raw_tensor(x, device=mge.device.get_default_device())) + return mge.Tensor(x) def save_to(self, name="grad"): - def callback(tensor, grad): + def callback(grad): setattr(self, name, grad) return callback @@ -136,14 +135,14 @@ def test_2nd_grad(): def test_grad_with_tensor_wrapper(): x_np = np.random.rand(10).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = mul(x, x) y = mul(y, y) - grad(y, TensorWrapper(np.ones_like(x_np))) + grad(y, mge.Tensor(np.ones_like(x_np))) np.testing.assert_almost_equal(x.grad.numpy(), 4 * x_np ** 3, decimal=6) @@ -162,8 +161,8 @@ def test_release(): finally: gc.enable() - x = TensorWrapper([0.0]) - dy = TensorWrapper(np.ones_like(x.numpy())) + x = mge.Tensor([0.0]) + dy = mge.Tensor(np.ones_like(x.numpy())) @check def _(): @@ -173,25 +172,25 @@ def test_release(): @check def _(): - with Grad().wrt(x) as g: + with Grad().wrt(x): pass @check def _(): - with Grad().wrt(x) as g: + with Grad().wrt(x): y = x * x def test_grad_inplace(): x_np = np.random.rand(10).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = mul(x, x) y *= y - grad(y, TensorWrapper(np.ones_like(x_np))) + grad(y, mge.Tensor(np.ones_like(x_np))) np.testing.assert_almost_equal(x.grad.numpy(), 4 * x_np ** 3, decimal=6) @@ -199,16 +198,16 @@ def test_elemwise_add(): x_np = np.random.rand(10).astype("float32") y_np = np.random.rand(10, 10).astype("float32") dz_np = np.random.rand(10, 10).astype("float32") - x = TensorWrapper(x_np) - y = TensorWrapper(y_np) - dz = TensorWrapper(dz_np) + x = mge.Tensor(x_np) + y = mge.Tensor(y_np) + dz = mge.Tensor(dz_np) refs = {} def f(x, y): x = x * 2 - refs["x"] = weakref.ref(x.__wrapped__) - refs["y"] = weakref.ref(y.__wrapped__) + refs["x"] = TensorWeakRef(x) + refs["y"] = TensorWeakRef(y) return x + y grad = Grad().wrt(x, callback=save_to(x)) @@ -226,14 +225,14 @@ def test_elemwise_add(): def test_elemwise_relu(): x_np = [1.0, -1.0] dz_np = [1.0] - x = TensorWrapper(x_np) - dz = TensorWrapper(dz_np) + x = mge.Tensor(x_np) + dz = mge.Tensor(dz_np) refs = {} def f(x): x = x * 2 - refs["x"] = weakref.ref(x.__wrapped__) + refs["x"] = TensorWeakRef(x) return relu(x) grad = Grad().wrt(x, callback=save_to(x)) @@ -258,7 +257,7 @@ def test_elemwise_relu_backward_fn(): def test_reshape(): x_np = np.random.rand(2, 5).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = x.reshape(5, 2) @@ -269,7 +268,7 @@ def test_reshape(): def test_subtensor(): x_np = np.random.rand(3, 3).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = x[1:-1, :2] @@ -282,7 +281,7 @@ def test_subtensor(): def test_IndexingMultiAxisVec(): x_np = np.random.rand(3, 3).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = x[[0, 2], [0, 2]] @@ -295,7 +294,7 @@ def test_IndexingMultiAxisVec(): def test_AxisAddRemove(): x_np = np.random.rand(1, 5).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = F.squeeze(F.expand_dims(x, 2), 0) @@ -308,7 +307,7 @@ def test_AxisAddRemove(): def test_Broadcast(): x_np = np.random.rand(3, 3, 1).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = F.broadcast_to(x, (3, 3, 10)) @@ -319,7 +318,7 @@ def test_Broadcast(): def test_Reduce_sum(): x_np = np.random.rand(3, 3).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = x.sum(axis=0) @@ -330,7 +329,7 @@ def test_Reduce_sum(): def test_Reduce_mean(): x_np = np.random.rand(3, 3).astype("float32") - x = TensorWrapper(x_np) + x = mge.Tensor(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = x.mean(axis=0) diff --git a/imperative/python/test/unit/core/test_indexing_op.py b/imperative/python/test/unit/core/test_indexing_op.py index 6a6880531..a1c5284c4 100644 --- a/imperative/python/test/unit/core/test_indexing_op.py +++ b/imperative/python/test/unit/core/test_indexing_op.py @@ -11,30 +11,29 @@ import collections import numpy as np import pytest -import megengine.core.tensor.raw_tensor +import megengine +import megengine.tensor as Tensor +from megengine.core._imperative_rt.core2 import apply from megengine.core._trace_option import use_symbolic_shape from megengine.core.ops import builtin -from megengine.core.tensor import Tensor -from megengine.core.tensor.core import apply -from megengine.core.tensor.raw_tensor import RawTensor, as_raw_tensor def cvt_to_shape_desc(val, inpvar, config=None): def as_tensor(val, device): assert device is not None, "can not infer device" # TODO: should copy to appropriate device - val = as_raw_tensor(val, device=device) + val = Tensor(val, device=device) return val device = None if inpvar is not None: - assert isinstance(inpvar, RawTensor) + assert isinstance(inpvar, Tensor) device = device or inpvar.device if config is not None: device = device or config.device - if isinstance(val, RawTensor): + if isinstance(val, Tensor): return as_tensor(val, device) if not isinstance(val, collections.abc.Iterable): @@ -43,7 +42,7 @@ def cvt_to_shape_desc(val, inpvar, config=None): components = [] on_host = True for i in val: - if isinstance(i, RawTensor): + if isinstance(i, Tensor): on_host = False device = device or i.device else: @@ -62,7 +61,7 @@ def cvt_to_shape_desc(val, inpvar, config=None): return as_tensor(shape, device) for idx, v in enumerate(components): - if not isinstance(v, RawTensor): + if not isinstance(v, Tensor): vi = int(v) assert vi == v, "could not convert {} to int".format(v) v = vi @@ -95,7 +94,7 @@ def canonize_inputs(inputs, *, config): # and is called with concat([a, b])) inputs = inputs[0] - if isinstance(inputs, RawTensor): + if isinstance(inputs, Tensor): return [inputs] old_inputs = inputs @@ -103,7 +102,7 @@ def canonize_inputs(inputs, *, config): get_comp_node = None need_cvt = False for i in old_inputs: - if isinstance(i, RawTensor): + if isinstance(i, Tensor): get_comp_node = lambda cn=i.device: cn else: need_cvt = True @@ -117,8 +116,8 @@ def canonize_inputs(inputs, *, config): return config.comp_node for idx, var in enumerate(inputs): - if not isinstance(var, RawTensor): - var = as_raw_tensor(var) + if not isinstance(var, Tensor): + var = Tensor(var) inputs[idx] = var return inputs @@ -131,15 +130,15 @@ def invoke_op(op, inputs_, cvt_inputs=canonize_inputs): def unpack_getitem(inp, tuple_val, *, allow_newaxis=True): - assert isinstance(inp, RawTensor) + assert isinstance(inp, Tensor) if not isinstance(tuple_val, tuple): tuple_val = (tuple_val,) def as_tensor(v): - if not isinstance(v, RawTensor): + if not isinstance(v, Tensor): vi = np.ascontiguousarray(v, dtype=np.int32) assert np.abs(vi - v).max() == 0, "bad index: {!r}".format(v) - v = as_raw_tensor(vi) + v = Tensor(vi) return v new_axes = [] @@ -275,14 +274,14 @@ def batched_incr_mesh_indexing(input, value, tuple_val): def test_transpose(): x = np.arange(10).reshape(2, 5).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) (yy,) = transpose(xx, pattern=[1, -1, 0]) np.testing.assert_equal(np.expand_dims(x.transpose(), axis=1), yy.numpy()) def test_broadcast(): x = np.arange(10).reshape(1, 10).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) (yy,) = broadcast(xx, (10, 10)) np.testing.assert_equal(np.repeat(x, 10, 0), yy.numpy()) @@ -290,7 +289,7 @@ def test_broadcast(): def test_subtensor(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(2).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) (yy0,) = subtensor(xx, (slice(0, 4, 2), 3)) (yy1,) = set_subtensor(xx, d, (slice(0, 4, 2), 3)) (yy2,) = incr_subtensor(xx, d, (slice(0, 4, 2), 3)) @@ -309,7 +308,7 @@ def test_subtensor(): def test_advance_indexing(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(15).reshape(3, 5).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) (yy0,) = advance_indexing(xx, ((0, 4, 2), slice(None, None, None))) (yy1,) = set_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) (yy2,) = incr_advance_indexing(xx, d, ((0, 4, 2), slice(None, None, None))) @@ -328,7 +327,7 @@ def test_advance_indexing(): def test_mesh_indexing(): x = np.arange(25).reshape(5, 5).astype("int32") d = np.arange(6).reshape(3, 2).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) (yy0,) = mesh_indexing(xx, (slice(0, 5, 2), (1, 3))) (yy1,) = set_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) (yy2,) = incr_mesh_indexing(xx, d, (slice(0, 5, 2), (1, 3))) @@ -355,7 +354,7 @@ def test_mesh_indexing(): def test_batched_mesh_indexing(): x = np.arange(24).reshape(2, 3, 4).astype("int32") d = np.arange(12).reshape(2, 2, 3).astype("int32") - xx = as_raw_tensor(x) + xx = Tensor(x) s = [(0, 1, 2), (1, 2, 3)] (yy0,) = batched_mesh_indexing(xx, (slice(None, None, None), [(0, 2)] * 2, s)) (yy1,) = batched_set_mesh_indexing( diff --git a/imperative/python/test/unit/core/test_tensor_wrapper.py b/imperative/python/test/unit/core/test_tensor_wrapper.py index 8dff16330..89274d4f0 100644 --- a/imperative/python/test/unit/core/test_tensor_wrapper.py +++ b/imperative/python/test/unit/core/test_tensor_wrapper.py @@ -9,12 +9,12 @@ import numpy as np from megengine.core.tensor.dtype import get_scale, get_zero_point, qint8, quint8 -from megengine.core.tensor.tensor_wrapper import TensorWrapper +from megengine.tensor import Tensor def test_basic(): x_np = np.random.rand(10).astype("float32") - x = TensorWrapper(x_np) + x = Tensor(x_np) y = x * x y_np = y.numpy() np.testing.assert_almost_equal(y_np, x_np * x_np) @@ -22,15 +22,15 @@ def test_basic(): def test_literal_arith(): x_np = np.random.rand(10).astype("float32") - x = TensorWrapper(x_np) + x = Tensor(x_np) y = x * 2 y_np = y.numpy() np.testing.assert_almost_equal(y_np, x_np * 2) def test_matmul(): - A = TensorWrapper(np.random.rand(5, 7).astype("float32")) - B = TensorWrapper(np.random.rand(7, 10).astype("float32")) + A = Tensor(np.random.rand(5, 7).astype("float32")) + B = Tensor(np.random.rand(7, 10).astype("float32")) C = A @ B np.testing.assert_almost_equal(C.numpy(), A.numpy() @ B.numpy(), decimal=6) @@ -38,7 +38,7 @@ def test_matmul(): def test_reduce(): def test_x(x_np): for m in ["sum", "prod", "min", "max", "mean"]: - x = TensorWrapper(x_np) + x = Tensor(x_np) y = getattr(x, m)(axis=-1, keepdims=True) np.testing.assert_almost_equal(y.numpy(), getattr(x_np, m)(-1), decimal=6) @@ -49,7 +49,7 @@ def test_reduce(): def test_set_subtensor(): - x = TensorWrapper([1, 2, 3]) + x = Tensor([1, 2, 3]) x[:] = [1, 1, 1] np.testing.assert_almost_equal(x.numpy(), [1, 1, 1], decimal=6) x[[0, 2]] = [3, 2] @@ -60,7 +60,7 @@ def test_set_subtensor(): def test_computing_with_numpy_array(): x = np.array([1, 2, 3], dtype=np.int32) - xx = TensorWrapper(x, device="cpu0") + xx = Tensor(x, device="cpu0") y = np.array([1, 0, 3], dtype=np.int32) assert np.add(xx, y).device == xx.device np.testing.assert_equal(np.add(xx, y).numpy(), np.add(x, y)) @@ -70,12 +70,12 @@ def test_computing_with_numpy_array(): def test_transpose(): x = np.random.rand(2, 5).astype("float32") - xx = TensorWrapper(x) + xx = Tensor(x) np.testing.assert_almost_equal(xx.T.numpy(), x.T) def test_as_type(): - x = TensorWrapper([1, 2, 3], dtype=np.float32) + x = Tensor([1, 2, 3], dtype=np.float32) y = x.astype(qint8(0.1)) np.testing.assert_almost_equal(get_scale(y.dtype), 0.1) z = y.astype(qint8(0.2)) diff --git a/imperative/python/test/unit/functional/test_tensor.py b/imperative/python/test/unit/functional/test_tensor.py index da7b8d19a..4ab424eb5 100644 --- a/imperative/python/test/unit/functional/test_tensor.py +++ b/imperative/python/test/unit/functional/test_tensor.py @@ -312,7 +312,7 @@ def test_device(): np.testing.assert_almost_equal(y1.numpy(), y2.numpy()) y3 = F.eye(x.shape, dtype="float32", device="xpux") - y4 = F.eye(x.shape, dtype="float32", device=x.device.to_c()) + y4 = F.eye(x.shape, dtype="float32", device=x.device) np.testing.assert_almost_equal(y3.numpy(), y4.numpy()) y5 = F.full((3, 2), 4, device=x.device) diff --git a/imperative/src/impl/op_trait.cpp b/imperative/src/impl/op_trait.cpp index 56bffb620..e2e3d19a1 100644 --- a/imperative/src/impl/op_trait.cpp +++ b/imperative/src/impl/op_trait.cpp @@ -14,7 +14,7 @@ #include "megbrain/imperative/ops/opr_attr.h" #include "./op_trait.h" -#include "./proxy_graph_detail.h" +#include "megbrain/imperative/proxy_graph_detail.h" namespace mgb { namespace imperative { diff --git a/imperative/src/impl/ops/collective_comm.cpp b/imperative/src/impl/ops/collective_comm.cpp index df83feff2..42f8e9b0a 100644 --- a/imperative/src/impl/ops/collective_comm.cpp +++ b/imperative/src/impl/ops/collective_comm.cpp @@ -13,7 +13,7 @@ #if MGB_ENABLE_OPR_MM #include "../op_trait.h" -#include "../proxy_graph_detail.h" +#include "megbrain/imperative/proxy_graph_detail.h" #include "megbrain/opr/mm_handler.h" #include "megbrain/utils/hash.h" #endif // MGB_ENABLE_OPR_MM diff --git a/imperative/src/impl/ops/io_remote.cpp b/imperative/src/impl/ops/io_remote.cpp index 86eb59bb5..7a635cce6 100644 --- a/imperative/src/impl/ops/io_remote.cpp +++ b/imperative/src/impl/ops/io_remote.cpp @@ -13,7 +13,7 @@ #if MGB_ENABLE_OPR_MM #include "../op_trait.h" -#include "../proxy_graph_detail.h" +#include "megbrain/imperative/proxy_graph_detail.h" #include "megbrain/opr/io_remote.h" #include "megbrain/opr/mm_handler.h" #endif // MGB_ENABLE_OPR_MM diff --git a/imperative/src/impl/ops/opr_attr.cpp b/imperative/src/impl/ops/opr_attr.cpp index c2f2a9440..2792b7ab1 100644 --- a/imperative/src/impl/ops/opr_attr.cpp +++ b/imperative/src/impl/ops/opr_attr.cpp @@ -13,7 +13,7 @@ #include "megbrain/serialization/opr_load_dump.h" #include "../op_trait.h" -#include "../proxy_graph_detail.h" +#include "megbrain/imperative/proxy_graph_detail.h" namespace mgb { namespace imperative { diff --git a/imperative/src/impl/proxy_graph_detail.cpp b/imperative/src/impl/proxy_graph_detail.cpp index 207c09a5e..659e88f91 100644 --- a/imperative/src/impl/proxy_graph_detail.cpp +++ b/imperative/src/impl/proxy_graph_detail.cpp @@ -10,7 +10,7 @@ */ #include "./proxy_graph.h" -#include "./proxy_graph_detail.h" +#include "megbrain/imperative/proxy_graph_detail.h" namespace mgb { namespace imperative { diff --git a/imperative/src/impl/proxy_graph_detail.h b/imperative/src/include/megbrain/imperative/proxy_graph_detail.h similarity index 93% rename from imperative/src/impl/proxy_graph_detail.h rename to imperative/src/include/megbrain/imperative/proxy_graph_detail.h index be0fbe484..2729f11fb 100644 --- a/imperative/src/impl/proxy_graph_detail.h +++ b/imperative/src/include/megbrain/imperative/proxy_graph_detail.h @@ -1,5 +1,5 @@ /** - * \file imperative/src/impl/proxy_graph_detail.h + * \file imperative/src/include/megbrain/imperative/proxy_graph_detail.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. -- GitLab