提交 10e942d9 编写于 作者: M Megvii Engine Team

refactor(mge): polish api

- refactor(mge): add support for optimizer.step().clear_grad() idiom

- refactor(mge): rename some methods of GradManager

- refactor(mge): remove tensor_nn and TensorDict

- refactor(mge): remove Buffer

- refactor(mge): remove requires_grad flag

- refactor(mge): add a default grad=None attribute to Tensor

- refactor(mge): deprecation for 1.0

GitOrigin-RevId: 3b723d938747e7a7d765ba3716b2e08f5223f62a
上级 9389a805
......@@ -74,8 +74,7 @@ from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func
from .device import *
from .logger import enable_debug_log, get_logger, set_log_file, set_log_level
from .serialization import load, save
from .tensor import Tensor, tensor
from .tensor_nn import Buffer, Parameter
from .tensor import Parameter, Tensor, tensor
from .version import __version__
_set_fork_exec_path_for_timed_func(
......
......@@ -22,7 +22,7 @@ class GradManager:
self._after_backward_callback = []
self._gradients = dict()
def register(self, params, callbacks=None):
def attach(self, params, callbacks=None):
if callbacks is None:
callbacks = []
if isinstance(callbacks, Callable):
......@@ -62,7 +62,7 @@ class GradManager:
if isinstance(grad, Future):
grad = grad.get()
param = self._param_dict[p]
if getattr(param, "grad", None) is None:
if param.grad is None:
param.grad = grad
else:
param.grad += grad
......@@ -70,9 +70,9 @@ class GradManager:
self._stop_record()
backwarding_grad_manager = cache
def __enter__(self):
def record(self):
if self._recording:
return self
raise RuntimeError("already recording")
grad = Grad()
self._recording = True
self._grad = grad
......@@ -88,16 +88,22 @@ class GradManager:
grad.wrt(param_wrapper, callback=callback)
grad.__enter__()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
def release(self):
if not self._recording:
raise RuntimeError("not recording")
self._stop_record()
record = __enter__
def _stop_record(self):
if self._grad is not None:
self._grad.__exit__(None, None, None)
self._recording = False
self._grad = None
self._gradients = dict()
def __enter__(self):
self.record()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._stop_record()
......@@ -70,7 +70,7 @@ class Dimshuffle(PodOpVisitor):
return bytes(ctypes.c_uint32(0)) + bytes(self)
def __init__(self, pattern, ndim=0):
assert isinstance(pattern, collections.Iterable)
assert isinstance(pattern, collections.abc.Iterable)
assert len(pattern) <= TensorShape.MAX_NDIM
pattern_array = Dimshuffle.Pattern.Pattern_Array()
for idx, v in enumerate(pattern):
......
......@@ -231,13 +231,13 @@ class OpNode:
def _wrap(x):
if isinstance(x, collections.Sequence):
if isinstance(x, collections.abc.Sequence):
return type(x)(map(_wrap, x))
return x.graph._wrap(x)
def _unwrap(x):
if isinstance(x, collections.Sequence):
if isinstance(x, collections.abc.Sequence):
return type(x)(map(_unwrap, x))
return x._node
......
......@@ -166,7 +166,7 @@ def _reduce(mode):
op = builtin.Reduce(mode=mode, axis=0)
(result,) = apply(op, data)
elif isinstance(axis, collections.Iterable):
elif isinstance(axis, collections.abc.Iterable):
axis = list(axis)
axis.sort(reverse=True)
......@@ -204,7 +204,9 @@ def _todo(*_):
def _expand_args(args):
if len(args) == 1:
if isinstance(args[0], (collections.Sequence, TensorBase, TensorWrapperBase)):
if isinstance(
args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase)
):
args = args[0]
return args
......
......@@ -143,7 +143,7 @@ def astensor1d(x, *reference, dtype=None, device=None):
(x,) = Const(x, dtype=dtype, device=device)(*reference)
return x
if not isinstance(x, collections.Sequence):
if not isinstance(x, collections.abc.Sequence):
raise TypeError
if any(isinstance(i, (TensorBase, TensorWrapperBase)) for i in x):
......
......@@ -432,7 +432,7 @@ def argmin(
[0]
"""
if isinstance(axis, collections.Iterable):
if isinstance(axis, collections.abc.Iterable):
axis = list(axis)
axis.sort(reverse=True)
......@@ -486,7 +486,7 @@ def argmax(
[5]
"""
if isinstance(axis, collections.Iterable):
if isinstance(axis, collections.abc.Iterable):
axis = list(axis)
axis.sort(reverse=True)
......
......@@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True):
:type allow_zero: bool
:param allow_zero: whether to allow zero tuple value"""
if not isinstance(value, collections.Iterable):
if not isinstance(value, collections.abc.Iterable):
value = int(value)
value = tuple([value for i in range(n)])
else:
......
......@@ -502,7 +502,7 @@ class trace:
raise TypeError(
"cannot specify output_names when output is already in dict format"
)
if output_names and not isinstance(output_names, collections.Sequence):
if output_names and not isinstance(output_names, collections.abc.Sequence):
output_names = (output_names,)
if output_names and len(output_names) != len(self._output_bindings):
raise ValueError(
......@@ -510,7 +510,7 @@ class trace:
len(self._output_bindings)
)
)
if arg_names and not isinstance(arg_names, collections.Sequence):
if arg_names and not isinstance(arg_names, collections.abc.Sequence):
arg_names = (arg_names,)
if arg_names and len(arg_names) != len(self._arg_bindings):
raise ValueError(
......@@ -646,9 +646,9 @@ class trace:
def _process_outputs(self, outputs):
output_names = None
if isinstance(outputs, collections.Mapping):
if isinstance(outputs, collections.abc.Mapping):
output_names, outputs = zip(*sorted(outputs.items()))
elif not isinstance(outputs, collections.Sequence):
elif not isinstance(outputs, collections.abc.Sequence):
outputs = (outputs,)
if not self._untraced:
......
......@@ -18,7 +18,6 @@ from .embedding import Embedding
from .identity import Identity
from .linear import Linear
from .module import Module
from .parampack import ParamPack
from .pooling import AvgPool2d, MaxPool2d
from .quant_dequant import DequantStub, QuantStub
from .sequential import Sequential
......@@ -9,7 +9,7 @@
import numpy as np
from ..functional import leaky_relu, prelu, relu, sigmoid, softmax
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .module import Module
......
......@@ -12,7 +12,7 @@ import numpy as np
from ..distributed.group import WORLD, Group
from ..functional import batch_norm2d, sync_batch_norm
from ..tensor_nn import Buffer, Parameter, Tensor
from ..tensor import Parameter, Tensor
from . import init
from .module import Module
......@@ -45,8 +45,8 @@ class _BatchNorm(Module):
tshape = (1, self.num_features, 1, 1)
if self.track_running_stats:
self.running_mean = Buffer(np.zeros(tshape, dtype=np.float32))
self.running_var = Buffer(np.ones(tshape, dtype=np.float32))
self.running_mean = Tensor(np.zeros(tshape, dtype=np.float32))
self.running_var = Tensor(np.ones(tshape, dtype=np.float32))
else:
self.running_mean = None
self.running_var = None
......
......@@ -13,7 +13,7 @@ import numpy as np
from ..core.ops._internal import param_defs as P
from ..functional import conv2d, conv_transpose2d, local_conv2d, relu
from ..functional.types import _pair, _pair_nonzero
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init
from .module import Module
......
......@@ -11,7 +11,7 @@ from typing import Optional
import numpy as np
from ..functional import embedding as embedding_func
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init
from .module import Module
......@@ -72,6 +72,7 @@ class Embedding(Module):
max_norm: Optional[float] = None,
norm_type: Optional[float] = None,
initial_weight: Parameter = None,
freeze: bool = False,
):
super().__init__()
if padding_idx is not None:
......@@ -83,6 +84,7 @@ class Embedding(Module):
self.norm_type = norm_type
self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim
self.freeze = freeze
if initial_weight is None:
self.weight = Parameter(
np.random.uniform(
......@@ -101,7 +103,11 @@ class Embedding(Module):
init.normal_(self.weight)
def forward(self, inputs):
return embedding_func(inputs, self.weight)
if self.freeze:
weight = self.weight.detach()
else:
weight = self.weight
return embedding_func(inputs, weight)
@classmethod
def from_pretrained(
......@@ -166,6 +172,6 @@ class Embedding(Module):
padding_idx=padding_idx,
max_norm=max_norm,
norm_type=norm_type,
freeze=freeze,
)
embedding.weight.requires_grad = not freeze
return embedding
......@@ -23,7 +23,7 @@ def fill_(tensor: Tensor, val: Union[float, int]) -> None:
:param tensor: An n-dimentional tensor to be initialized
:param val: The value to be filled throughout the tensor
"""
tensor.set_value(full(shape=tensor.shape, value=val, dtype=tensor.dtype))
tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype))
def zeros_(tensor: Tensor) -> None:
......@@ -50,7 +50,7 @@ def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None:
:param a: Lower bound of the sampling interval
:param b: Upper bound of the sampling interval
"""
tensor.set_value(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype))
tensor._reset(uniform(tensor.shape, low=a, high=b).astype(tensor.dtype))
def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
......@@ -61,7 +61,7 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
:param mean: The mean of the normal distribution
:param std: The standard deviation of the normal distribution
"""
tensor.set_value(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype))
tensor._reset(gaussian(tensor.shape, mean=mean, std=std).astype(tensor.dtype))
def calculate_gain(
......
......@@ -8,7 +8,7 @@
import numpy as np
from ..functional import linear
from ..tensor_nn import Parameter
from ..tensor import Parameter
from . import init
from .module import Module
......
......@@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import warnings
from abc import ABCMeta, abstractmethod
from collections import OrderedDict
from typing import Any, Callable, Iterable, Optional, Set, Tuple, Union
......@@ -14,8 +15,8 @@ import numpy as np
from ..core.tensor.dtype import is_quantize
from ..core.tensor.utils import make_shape_tuple
from ..logger import get_logger
from ..tensor import Tensor
from ..tensor_nn import Buffer, Parameter
from ..tensor import Parameter, Tensor
from ..utils.deprecation import deprecated
from ..utils.hook import HookHandler
logger = get_logger(__name__)
......@@ -48,7 +49,7 @@ def _is_parameter(obj):
def _is_buffer(obj):
return isinstance(obj, Buffer)
return isinstance(obj, Tensor) and not isinstance(obj, Parameter)
def _is_module(obj):
......@@ -163,49 +164,43 @@ class Module(metaclass=ABCMeta):
seen=seen,
)
def parameters(
self, requires_grad: Optional[bool] = None, recursive: bool = True, **kwargs
) -> Iterable[Parameter]:
def parameters(self, recursive: bool = True, **kwargs) -> Iterable[Parameter]:
r"""Returns an iterable for the :class:`~.Parameter` of the module.
:param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad`
attribute of returned :class:`.Parameter`. ``None`` for no limitation.
:param recursive: If ``True``, returns all :class:`~.Parameter` within this
module, else only returns :class:`~.Parameter` that are direct attributes
of this module.
"""
if "requires_grad" in kwargs:
del kwargs["requires_grad"]
warnings.warn("passing requires_grad has no effect currently")
def predicate(obj) -> bool:
return _is_parameter(obj) and (
requires_grad is None or obj.requires_grad == requires_grad
)
return _is_parameter(obj)
yield from self._flatten(
with_key=False, predicate=predicate, recursive=recursive, **kwargs
)
def named_parameters(
self,
requires_grad: Optional[bool] = None,
prefix: Optional[str] = None,
recursive: bool = True,
**kwargs
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs
) -> Iterable[Tuple[str, Parameter]]:
"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where
``key`` is the dotted path from this module to the :class:`~.Parameter` .
:param requires_grad: Limitation over the :attr:`~.Parameter.requires_grad`
attribute of returned :class:`~.Parameter` . ``None`` for no limitation.
:param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all :class:`~.Parameter` within this
module, else only returns :class:`~.Parameter` that are direct attributes
of this module.
"""
if "requires_grad" in kwargs:
del kwargs["requires_grad"]
warnings.warn("passing requires_grad has no effect currently")
def predicate(obj) -> bool:
return _is_parameter(obj) and (
requires_grad is None or obj.requires_grad == requires_grad
)
return _is_parameter(obj)
yield from self._flatten(
with_key=True,
......@@ -215,11 +210,13 @@ class Module(metaclass=ABCMeta):
**kwargs,
)
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Buffer]:
"""Returns an iterable for the :class:`~.Buffer` of the module.
def buffers(self, recursive: bool = True, **kwargs) -> Iterable[Tensor]:
"""Returns an iterable for the buffers of the module.
:param recursive: If ``True``, returns all :class:`~.Buffer` within this
module, else only returns :class:`~.Buffer` that are direct attributes
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.
:param recursive: If ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes
of this module.
"""
yield from self._flatten(
......@@ -228,13 +225,15 @@ class Module(metaclass=ABCMeta):
def named_buffers(
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs
) -> Iterable[Tuple[str, Buffer]]:
"""Returns an iterable for key :class:`~.Buffer` pairs of the module, where
``key`` is the dotted path from this module to the :class:`~.Buffer` .
) -> Iterable[Tuple[str, Tensor]]:
"""Returns an iterable for key buffer pairs of the module, where
``key`` is the dotted path from this module to the buffer.
Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.
:param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all :class:`~.Buffer` within this
module, else only returns :class:`~.Buffer` that are direct attributes
:param recursive: If ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes
of this module.
"""
yield from self._flatten(
......@@ -297,6 +296,7 @@ class Module(metaclass=ABCMeta):
for it in self.modules():
fn(it)
@deprecated(version="1.0")
def zero_grad(self) -> None:
"""Set all parameters' grads to zero
"""
......@@ -505,7 +505,7 @@ class Module(metaclass=ABCMeta):
# scale/zero_points maybe invalid, use pretrained dtype instead.
if is_quantize(to_be_load.dtype) and is_quantize(var.dtype):
var = var.astype(to_be_load.dtype)
var.set_value(to_be_load)
var._reset(to_be_load)
loaded.append(k)
return set(loaded), set(skipped)
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections
from typing import Callable, Iterable, Optional, Tuple
import numpy as np
from ..tensor_nn import Parameter, Tensor
from .module import Module
class ParamPack(Module):
r"""Pack module's parameters by gathering their memory to continuous address.
Using (device, dtype, requires_grad) as key, for example ('gpu0', float32, True),
parameters with same key will be packed togather.
It helps a lot for multimachine training by speeding up allreduce gradients.
:param model: the module you want to pack parameters.
:param nr_ignore_first: how many parameters will be unpacked at first.
:param max_size_per_group: upper bound of packed parameters' size in MB.
:param max_nr_params_per_group: upper bound of the number of parameters of each group.
"""
def __init__(
self,
model: Module,
nr_ignore_first: int = 8,
max_size_per_group: int = 10,
max_nr_params_per_group: int = 100,
group_func: Callable = lambda name, param: 0,
):
super().__init__()
self._model = model
self._nr_ignore_first = nr_ignore_first
self._max_size_per_group = max_size_per_group
self._max_nr_params_per_group = max_nr_params_per_group
self._group_func = group_func
self._grouped_params = []
self._packed_params = []
params = model.named_parameters()
self._pack_params(params)
def parameters(self, requires_grad: Optional[bool] = None) -> Iterable[Parameter]:
for param in self._packed_params:
if requires_grad is None or param.requires_grad == requires_grad:
yield param
def named_parameters(
self, requires_grad: Optional[bool] = None
) -> Iterable[Tuple[str, Parameter]]:
for idx, param in enumerate(self._packed_params):
if requires_grad is None or param.requires_grad == requires_grad:
yield "packed_param_" + str(idx), param
def _pack_params(self, params: Iterable[Tuple[str, Parameter]]):
groups = collections.defaultdict(list)
ignored = 0
param_id = 0
for name, param in params:
if self._nr_ignore_first > ignored:
ignored += 1
self._grouped_params.append([{"shape": param.shape, "id": param_id}])
param.pack_group_key = self._group_func(name, param)
self._packed_params.append(param)
else:
key = (
param.dtype,
param.device,
param.requires_grad,
self._group_func(name, param),
)
groups[key].append({"tensor": param, "id": param_id})
param_id += 1
for (dtype, device, requires_grad, group_key) in groups.keys():
dtype_sz = np.dtype(dtype).itemsize
align = device.mem_align
if align < dtype_sz:
align = 1
else:
assert align % dtype_sz == 0
align //= dtype_sz
group = groups[(dtype, device, requires_grad, group_key)]
while group:
aligned_pos = []
offset = 0
params = []
idx = 0
while idx < len(group):
param = group[idx]
assert param["tensor"].device == device
padding = (align - (offset & (align - 1))) & (align - 1)
offset += padding
aligned_pos.append(offset)
params.append(param)
offset += int(np.prod(param["tensor"].shape))
idx += 1
if (
offset * dtype_sz >= self._max_size_per_group * 1024 * 1024
or idx >= self._max_nr_params_per_group
):
break
group = group[idx:]
if idx == 1:
# ignore param packs with only one item
params[0]["tensor"].pack_group_key = group_key
self._packed_params.append(params[0]["tensor"])
self._grouped_params.append(
[{"shape": params[0]["tensor"].shape, "id": params[0]["id"]}]
)
continue
packed_value = np.zeros((offset,), dtype=dtype)
for param, pos in zip(params, aligned_pos):
val = param["tensor"].numpy()
packed_value[pos : pos + val.size] = val.flatten()
new_param = Parameter(
value=packed_value,
device=device,
dtype=dtype,
requires_grad=requires_grad,
)
new_param.pack_group_key = group_key
self._packed_params.append(new_param)
self._grouped_params.append(
[{"shape": i["tensor"].shape, "id": i["id"]} for i in params]
)
def forward(self, *args, **kwargs):
replace_param = dict()
for i in range(len(self._packed_params)):
packed_param = self._packed_params[i]
grouped_params = self._grouped_params[i]
if len(grouped_params) == 1:
continue
split = param_pack_split(
packed_param._symvar, [i["shape"] for i in grouped_params]
)
split = [
Parameter(Tensor(i, requires_grad=packed_param.requires_grad))
for i in split
]
for j in range(len(split)):
replace_param[grouped_params[j]["id"]] = split[j]
self._model.replace_param(replace_param, 0)
return self._model.forward(*args, **kwargs)
......@@ -12,7 +12,7 @@ import numpy as np
from ... import module as Float
from ...core.tensor import dtype
from ...functional import conv_bias_activation
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import conv as QAT
from .module import QuantizedModule
......
......@@ -5,7 +5,7 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import conv_bn as QAT
from .conv import Conv2d
......
......@@ -9,7 +9,7 @@ import numpy as np
from ... import functional as F
from ...core.tensor import dtype
from ...tensor_nn import Parameter
from ...tensor import Parameter
from ..qat import linear as QAT
from .module import QuantizedModule
......
......@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np
from ..functional import sqrt
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer
......@@ -63,7 +63,7 @@ class Adadelta(Optimizer):
for param in param_group["params"]:
if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue
states = self._state[param]
......
......@@ -11,7 +11,7 @@ from typing import Iterable, Union
import numpy as np
from ..functional import sqrt
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer
......@@ -62,7 +62,7 @@ class Adagrad(Optimizer):
for param in param_group["params"]:
if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue
states = self._state[param]
......
......@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Tuple, Union
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer
......@@ -59,7 +59,7 @@ class Adam(Optimizer):
for param in param_group["params"]:
if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue
grad = param.grad
......
......@@ -7,7 +7,7 @@
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from abc import ABCMeta, abstractmethod
from collections import Iterable
from collections.abc import Iterable
from contextlib import contextmanager
from typing import Dict
from typing import Iterable as Iter
......@@ -15,8 +15,7 @@ from typing import Union
import numpy as np
from ..tensor import Tensor, TensorDict
from ..tensor_nn import Buffer, Parameter
from ..tensor import Parameter, Tensor
class _RequiredParameter:
......@@ -37,7 +36,7 @@ class Optimizer(metaclass=ABCMeta):
def __init__( # pylint: disable=too-many-branches
self, params: Union[Iter[Parameter], dict], defaults: dict,
):
self._state = TensorDict()
self._state = dict()
self._defaults = defaults
if isinstance(params, (Parameter, dict)):
......@@ -93,10 +92,6 @@ class Optimizer(metaclass=ABCMeta):
"optimizer can only optimize Parameters, but one of the params is "
+ type(param)
)
if not param.requires_grad:
raise ValueError(
"optimizer can only optimize Parameters with requires_grad=True"
)
for name, default in self._defaults.items():
if default is required and name not in param_group:
......@@ -122,7 +117,7 @@ class Optimizer(metaclass=ABCMeta):
initializer = np.zeros(param.shape, dtype=np.float32)
state_dict = self._state.setdefault(param, {})
assert state_name not in state_dict
state = Buffer(initializer)
state = Tensor(initializer)
state_dict[state_name] = state
@abstractmethod
......@@ -140,7 +135,7 @@ class Optimizer(metaclass=ABCMeta):
params.append(param)
return params
def step(self, clear_grad=False):
def step(self):
r"""Performs a single optimization step.
"""
......@@ -152,8 +147,7 @@ class Optimizer(metaclass=ABCMeta):
"Please use a list instead."
)
self._updates(group)
if clear_grad:
self.clear_grad()
return self
def clear_grad(self):
r"""Clear the grad buffer.
......@@ -161,8 +155,7 @@ class Optimizer(metaclass=ABCMeta):
"""
for param_group in self.param_groups:
for param in param_group["params"]:
if getattr(param, "grad", None) is not None:
param.grad = None
param.grad = None
def state_dict(self) -> Dict:
r"""Export the optimizer state.
......@@ -171,7 +164,7 @@ class Optimizer(metaclass=ABCMeta):
"""
param_groups = []
state = dict()
param2id = TensorDict()
param2id = dict()
cur_id = 0
for group in self.param_groups:
......@@ -213,8 +206,9 @@ class Optimizer(metaclass=ABCMeta):
p = param_new
self._state[p] = state["state"][param_saved].copy()
for k, v in self._state[p].items():
if isinstance(v, Buffer):
self._state[p][k] = Buffer(v.numpy())
if isinstance(v, Tensor):
# TODO: maybe a more efficient way?
self._state[p][k] = Tensor(v.numpy())
if set(group_new.keys()) != set(group_saved.keys()):
raise ValueError(
......
......@@ -8,7 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from typing import Iterable, Union
from ..tensor_nn import Parameter
from ..tensor import Parameter
from .optimizer import Optimizer
......@@ -52,7 +52,7 @@ class SGD(Optimizer):
momentum = param_group["momentum"]
for param in param_group["params"]:
if not param.requires_grad or "grad" not in param.__dict__:
if param.grad is None:
continue
grad = param.grad
......
......@@ -14,8 +14,7 @@ from .. import functional as F
from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
from ..core.tensor.function import Function
from ..module import Module
from ..tensor import Tensor
from ..tensor_nn import Parameter
from ..tensor import Parameter, Tensor
from .utils import QuantMode, fake_quant_tensor, get_qparam_dict
......
......@@ -13,7 +13,7 @@ import numpy as np
from .. import functional as F
from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
from ..module import Module
from ..tensor_nn import Buffer
from ..tensor import Tensor
from .utils import QuantMode, Round, get_qparam_dict
......@@ -82,8 +82,8 @@ class MinMaxObserver(Observer):
):
super().__init__(dtype, narrow_range)
self.mode = mode
self.min_val = Buffer(np.finfo(np.float32).max, dtype=np.float32)
self.max_val = Buffer(np.finfo(np.float32).min, dtype=np.float32)
self.min_val = Tensor(np.finfo(np.float32).max, dtype=np.float32)
self.max_val = Tensor(np.finfo(np.float32).min, dtype=np.float32)
self.scale_limit = eps
def _calculate_qparams(self, inp_min_val, inp_max_val):
......@@ -118,8 +118,8 @@ class MinMaxObserver(Observer):
# stop gradient
x = x_orig.detach()
# find max and min
self.min_val.set_value(F.minimum(self.min_val, x.min()))
self.max_val.set_value(F.maximum(self.max_val, x.max()))
self.min_val._reset(F.minimum(self.min_val, x.min()))
self.max_val._reset(F.maximum(self.max_val, x.max()))
return x_orig
......@@ -133,22 +133,22 @@ class ExponentialMovingAverageObserver(MinMaxObserver):
narrow_range: bool = False,
):
super().__init__(mode, eps, dtype, narrow_range)
self.momentum = Buffer(momentum)
self.runtime_momentum = Buffer(0.0)
self.momentum = Tensor(momentum)
self.runtime_momentum = Tensor(0.0)
def set_momentum(self, momentum):
self.momentum.set_value(momentum)
self.momentum._reset(momentum)
def forward(self, x_orig):
if self.enabled:
# stop gradient
x = x_orig.detach()
# Exponential Moving Average
self.min_val.set_value(
self.min_val._reset(
self.min_val * self.runtime_momentum
+ (1 - self.runtime_momentum) * x.min()
)
self.max_val.set_value(
self.max_val._reset(
self.max_val * self.runtime_momentum
+ (1 - self.runtime_momentum) * x.max()
)
......@@ -171,7 +171,7 @@ class HistogramObserver(MinMaxObserver):
self.bins = bins
self.upsample_rate = upsample_rate
self.dst_nbins = _metadata_dict[dtype].qmax - _metadata_dict[dtype].qmin + 1
self.histogram = Buffer([-1] + [0.0] * (bins - 1))
self.histogram = Tensor([-1] + [0.0] * (bins - 1))
def _non_linear_param_search(self):
r"""Non-linear parameter search.
......@@ -395,9 +395,9 @@ class HistogramObserver(MinMaxObserver):
self.bins,
)
self.histogram.set_value(new_histogram)
self.min_val.set_value(new_min)
self.max_val.set_value(new_max)
self.histogram._reset(new_histogram)
self.min_val._reset(new_min)
self.max_val._reset(new_max)
def forward(self, x_orig):
self.sideeffect_forward(x_orig)
......
......@@ -14,10 +14,11 @@ from .core import Tensor as _Tensor
from .core.ops.builtin import Copy
from .core.tensor.core import apply
from .device import get_default_device
from .utils.deprecation import deprecated
class Tensor(_Tensor):
requires_grad = False
grad = None
dmap_callback = None
def __init__(self, data, dtype=None, device=None):
......@@ -26,15 +27,32 @@ class Tensor(_Tensor):
self.q_dict = {"mode": None, "scale": None, "zero_point": None}
super().__init__(data, dtype=dtype, device=device)
@deprecated(version="1.0", reason="no need to reuse an existing tensor since 1.0")
def set_value(self, value):
self._reset(value)
@deprecated(version="1.0", reason="use *= 0 instead")
def reset_zero(self):
self *= 0
def to(self, cn):
return apply(Copy(comp_node=cn), self)[0]
@property
def requires_grad(self):
raise AttributeError("requires_grad is reserved for future use")
@requires_grad.setter
def requires_grad(self, value):
raise AttributeError("requires_grad is reserved for future use")
@requires_grad.deleter
def requires_grad(self):
raise AttributeError("requires_grad is reserved for future use")
def __hash__(self):
return id(self)
def __getstate__(self):
r""" __getstate__ will be called for pickle serialization or deep copy
"""
......@@ -73,53 +91,6 @@ class Tensor(_Tensor):
tensor = Tensor
class Dict(collections.MutableMapping):
def __init__(self, *args, key=None, **kwargs):
self.data = {}
if key:
self.keyfn = key
for i in args:
self.update(i)
self.update(**kwargs)
@staticmethod
def keyfn(key): # pylint: disable=method-hidden
return key
def __getitem__(self, key):
_, v = self.data[self.keyfn(key)]
return v
def __setitem__(self, key, value):
self.data[self.keyfn(key)] = key, value
def __delitem__(self, key):
del self.data[self.keyfn(key)]
def __iter__(self):
for _, (k, _) in self.data.items():
yield k
def __len__(self):
return len(self.data)
class TensorDict(Dict): # pylint: disable=too-many-ancestors
class keyfn:
def __new__(cls, x: Tensor):
if not isinstance(x, Tensor):
return x
return super().__new__(cls)
def __init__(self, x: Tensor):
self._data = x # do not save id directly to make pickle work
def __hash__(self):
return id(self._data)
def __eq__(self, other):
# pylint: disable=undefined-variable
return isinstance(other, __class__) and id(self._data) == id(other._data)
def __init__(self, *args):
super().__init__(*args)
class Parameter(Tensor):
r"""A kind of Tensor that is to be considered a module parameter.
"""
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from . import Tensor, tensor
class Buffer(Tensor):
r"""A kind of Tensor with ``requires_grad=False``.
"""
class Parameter(Tensor):
r"""A kind of Tensor that is to be considered a module parameter.
"""
requires_grad = True
from deprecated.sphinx import deprecated
......@@ -15,7 +15,7 @@ def get_ndtuple(value, *, n, allow_zero=True):
:type allow_zero: bool
:param allow_zero: whether to allow zero tuple value"""
if not isinstance(value, collections.Iterable):
if not isinstance(value, collections.abc.Iterable):
value = int(value)
value = tuple([value for i in range(n)])
else:
......
......@@ -5,3 +5,4 @@ requests
tabulate
tqdm
redispy
deprecated
......@@ -38,7 +38,7 @@ class Simple2(Module):
def test_advance_indexing():
net = Simple()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad()
......@@ -48,7 +48,7 @@ def test_advance_indexing():
data = tensor(raw_data)
mask = tensor(raw_mask)
answer = 1.0 - raw_data[raw_mask].sum()
with gm.record():
with gm:
loss = net(data, mask).sum()
gm.backward(loss)
optim.step()
......@@ -58,7 +58,7 @@ def test_advance_indexing():
def test_advance_indexing_with_subtensor():
net = Simple2()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad()
......@@ -66,7 +66,7 @@ def test_advance_indexing_with_subtensor():
raw_data = np.arange(576).reshape(dshape).astype(np.float32)
data = tensor(raw_data)
answer = 1.0 - raw_data[1, ..., :, 0:4:2, 0:2].sum()
with gm.record():
with gm:
loss = net(data).sum()
gm.backward(loss)
optim.step()
......
......@@ -28,13 +28,13 @@ class Simple(Module):
def test_ai():
net = Simple()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad()
dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32))
with gm.record():
with gm:
loss = net(data).sum()
gm.backward(loss)
optim.step()
......
......@@ -25,12 +25,12 @@ def test_frozen_bn():
saved_wt = m.weight.numpy()
saved_bias = m.bias.numpy()
gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad()
data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).mean()
gm.backward(loss)
optim.step()
......@@ -46,12 +46,12 @@ def test_bn_no_track_stat():
nchannel = 3
m = BatchNorm2d(nchannel, track_running_stats=False)
gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad()
data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).sum()
gm.backward(loss)
optim.step()
......@@ -68,12 +68,12 @@ def test_bn_no_track_stat2():
saved_mean = m.running_mean.numpy()
assert saved_mean is not None
gm = ad.GradManager().register(m.parameters())
gm = ad.GradManager().attach(m.parameters())
optim = optimizer.SGD(m.parameters(), lr=1.0)
optim.clear_grad()
data = np.random.random((6, nchannel, 2, 2)).astype("float32")
with gm.record():
with gm:
loss = m(data).sum()
gm.backward(loss)
optim.step()
......
......@@ -74,13 +74,11 @@ class XORNet(Module):
def test_training_converge():
net = XORNet()
opt = SGD(
net.parameters(requires_grad=True), lr=0.01, momentum=0.9, weight_decay=5e-4
)
gm = ad.GradManager().register(net.parameters())
opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
gm = ad.GradManager().attach(net.parameters())
def train(data, label):
with gm.record():
with gm:
pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss)
......
......@@ -91,7 +91,7 @@ class MnistNet(Module):
def train(data, label, net, opt, gm):
with gm.record():
with gm:
pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss)
......@@ -117,7 +117,7 @@ def update_model(model_path):
net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32)
......@@ -152,7 +152,7 @@ def run_train(
net.load_state_dict(checkpoint["net_init"])
lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = Tensor(checkpoint["data"], dtype=np.float32)
label = Tensor(checkpoint["label"], dtype=np.int32)
......
......@@ -32,11 +32,11 @@ def test_detach():
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
dshape = (10, 10)
data = tensor(np.ones(dshape).astype(np.float32))
with gm.record():
with gm:
loss = net(data).sum()
gm.backward(loss)
optim.step()
......
......@@ -97,7 +97,7 @@ class MnistNet(Module):
def train(data, label, net, opt, gm):
opt.clear_grad()
with gm.record():
with gm:
pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label)
gm.backward(loss)
......@@ -125,8 +125,7 @@ def update_model(model_path):
lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager()
gm.register(
gm = ad.GradManager().attach(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
)
......@@ -171,8 +170,7 @@ def run_test(
lr = checkpoint["sgd_lr"]
opt = SGD(net.parameters(), lr=lr)
gm = ad.GradManager()
gm.register(
gm = ad.GradManager().attach(
net.parameters(), callbacks=[dist.make_allreduce_cb("MEAN", dist.WORLD)]
)
......
......@@ -33,10 +33,10 @@ def test_hello_world():
optim = optimizer.SGD(net.parameters(), lr=1.0)
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = tensor([2.34])
with gm.record():
with gm:
loss = net(data)
gm.backward(loss)
optim.step()
......
......@@ -13,7 +13,7 @@ import megengine.functional as F
from megengine import Parameter, optimizer
from megengine.jit import trace
from megengine.module import Linear, Module
from megengine.tensor import TensorDict, tensor
from megengine.tensor import tensor
class MLP(Module):
......@@ -44,7 +44,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
net = Simple()
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
check_func = check_class(net, **test_case)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
step = 0
data_shape = (2, 28)
......@@ -57,12 +57,12 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
data = tensor(np.random.random(data_shape).astype(np.float32))
opt.clear_grad()
with gm.record():
with gm:
pred = net(data)
loss = pred.sum()
gm.backward(loss)
ori_params = TensorDict()
ori_params = {}
for param in net.parameters():
ori_params[param] = np.copy(param.numpy())
opt.step()
......@@ -75,7 +75,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
@trace(symbolic=symbolic)
def train_func(data, *, opt=None, gm=None):
opt.clear_grad()
with gm.record():
with gm:
pred = net(data)
loss = pred.sum()
gm.backward(loss)
......@@ -84,7 +84,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
# reset net and opt
net = Simple()
opt = getattr(optimizer, opt_str)(net.parameters(), **test_case)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
check_func = check_class(net, **test_case)
step = 0
for i in range(iter_num):
......@@ -93,7 +93,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
group["lr"] += 0.01
check_func.lr += 0.01
ori_params = TensorDict()
ori_params = {}
for param in net.parameters():
ori_params[param] = np.copy(param.numpy())
......@@ -105,7 +105,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
def test_sgd():
class CheckValue:
def __init__(self, net, **kwarg):
self.slots = TensorDict()
self.slots = {}
for param in net.parameters():
self.slots[param] = np.zeros(param.shape).astype(np.float32)
for k, v in kwarg.items():
......@@ -134,8 +134,8 @@ def test_sgd():
def test_adam():
class CheckValue:
def __init__(self, net, **kwarg):
self.m_slots = TensorDict()
self.v_slots = TensorDict()
self.m_slots = {}
self.v_slots = {}
for param in net.parameters():
self.m_slots[param] = np.zeros(param.shape).astype(np.float32)
self.v_slots[param] = np.zeros(param.shape).astype(np.float32)
......@@ -175,7 +175,7 @@ def test_adam():
def test_adagrad():
class CheckValue:
def __init__(self, net, **kwarg):
self.s_slots = TensorDict()
self.s_slots = {}
for param in net.parameters():
self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
for k, v in kwarg.items():
......@@ -207,8 +207,8 @@ def test_adagrad():
def test_adadelta():
class CheckValue:
def __init__(self, net, **kwarg):
self.s_slots = TensorDict()
self.a_slots = TensorDict()
self.s_slots = {}
self.a_slots = {}
for param in net.parameters():
self.s_slots[param] = np.zeros(param.shape).astype(np.float32)
self.a_slots[param] = np.zeros(param.shape).astype(np.float32)
......
......@@ -23,11 +23,11 @@ def test_save_load():
optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = tensor([2.34])
with gm.record():
with gm:
loss = net(data)
gm.backward(loss)
......@@ -55,7 +55,7 @@ def test_save_load():
optim.load_state_dict(checkpoint["opt_state"])
print("load done")
with gm.record():
with gm:
loss = net([1.23])
gm.backward(loss)
......
......@@ -31,12 +31,12 @@ def test_sgd_momentum():
optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
optim.clear_grad()
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = tensor([2.34])
# do a step of train
with gm.record():
with gm:
loss = net(data)
gm.backward(loss)
optim.step()
......@@ -51,7 +51,7 @@ def test_sgd_momentum():
# do a step of train
optim.clear_grad()
with gm.record():
with gm:
loss = net(data)
gm.backward(loss)
optim.step()
......@@ -69,7 +69,7 @@ def test_sgd_momentum_trace():
@trace(symbolic=symbolic)
def train_func(data, *, model=None, optim=None, gm=None):
optim.clear_grad()
with gm.record():
with gm:
loss = net(data)
gm.backward(loss)
optim.step()
......@@ -82,7 +82,7 @@ def test_sgd_momentum_trace():
net = Simple()
optim = optimizer.SGD(net.parameters(), lr=1.0, momentum=0.9)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
data = tensor([2.34])
train_func(data, model=net, optim=optim, gm=gm)
np.testing.assert_almost_equal(
......
......@@ -61,15 +61,15 @@ class XORNet(M.Module):
def test_xornet_trace_dump():
net = XORNet()
opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9)
gm = GradManager().register(net.parameters(requires_grad=True))
opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
gm = GradManager().attach(net.parameters())
batch_size = 64
train_dataset = minibatch_generator(batch_size)
val_dataset = minibatch_generator(batch_size)
@trace
def train_fun(data, label):
with gm.record():
with gm:
net.train()
pred = net(data)
loss = F.cross_entropy_with_softmax(pred, label)
......
......@@ -14,7 +14,7 @@ import pytest
import megengine.core.ops.builtin as builtin
import megengine.core.tensor.dtype as dtype
import megengine.functional as F
from megengine import Buffer, Parameter, is_cuda_available, tensor
from megengine import Parameter, Tensor, is_cuda_available, tensor
from megengine.core._trace_option import use_tensor_shape
from megengine.core.autodiff.grad import Grad
from megengine.core.tensor.utils import make_shape_tuple
......@@ -330,7 +330,7 @@ def test_roi_pooling():
def test_add_update():
shape = (2, 3)
v = np.random.random(shape).astype(np.float32)
b = Buffer(v)
b = Tensor(v)
u = F.add_update(b, 1)
assertTensorClose(u.numpy(), v + 1)
......@@ -347,7 +347,7 @@ def test_add_update():
def test_add_update_params():
b = np.random.random((2, 3)).astype(np.float32)
y = Buffer(b)
y = Tensor(b)
# @jit.trace
def f(x):
......@@ -355,7 +355,7 @@ def test_add_update_params():
f(np.zeros((2, 3)).astype(np.float32))
z = Buffer(np.zeros((2, 3)).astype(np.float32))
z = Tensor(np.zeros((2, 3)).astype(np.float32))
F.add_update(y, z, beta=0.1)
res = f(np.ones((2, 3)).astype(np.float32))
......
......@@ -12,7 +12,7 @@ import numpy as np
import pytest
import megengine.functional as F
from megengine import Buffer, Parameter, is_cuda_available, tensor
from megengine import tensor
from megengine.core._trace_option import use_tensor_shape
from megengine.core.tensor.utils import astensor1d
from megengine.distributed.helper import get_device_count_by_fork
......
......@@ -14,10 +14,9 @@ import pytest
import megengine as mge
import megengine.distributed as dist
from megengine import tensor
from megengine import Tensor
from megengine.core._trace_option import use_tensor_shape
from megengine.module import BatchNorm1d, BatchNorm2d, SyncBatchNorm
from megengine.tensor import Tensor
from megengine.test import assertTensorClose
......@@ -45,10 +44,8 @@ def test_syncbn():
return
dist.init_process_group("localhost", port, nr_ranks, rank, rank)
bn = SyncBatchNorm(nr_chan, momentum=momentum, eps=eps)
data_tensor = tensor([])
for i in range(steps):
data_tensor.set_value(data[i])
yv = bn(data_tensor)
yv = bn(Tensor(data[i]))
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
assertTensorClose(running_mean, bn.running_mean.numpy(), max_err=5e-6)
......@@ -105,7 +102,6 @@ def test_batchnorm():
bn = BatchNorm1d(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
data = tensor([])
for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
......@@ -120,8 +116,7 @@ def test_batchnorm():
running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -137,7 +132,7 @@ def test_batchnorm():
var_backup = bn.running_var.numpy()
bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data)
yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
......@@ -161,7 +156,6 @@ def test_syncbn1d():
bn = SyncBatchNorm(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1), dtype=np.float32)
data = tensor([])
for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True)
......@@ -176,8 +170,7 @@ def test_syncbn1d():
running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -193,7 +186,7 @@ def test_syncbn1d():
var_backup = bn.running_var.numpy()
bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data)
yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
......@@ -210,7 +203,6 @@ def test_batchnorm2d():
bn = BatchNorm2d(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
data = tensor([])
for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
......@@ -226,8 +218,7 @@ def test_batchnorm2d():
running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -239,7 +230,7 @@ def test_batchnorm2d():
var_backup = bn.running_var.numpy()
bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data)
yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
......@@ -263,7 +254,6 @@ def test_syncbn2d():
bn = SyncBatchNorm(nr_chan, momentum=momentum)
running_mean = np.zeros((1, nr_chan, 1, 1), dtype=np.float32)
running_var = np.ones((1, nr_chan, 1, 1), dtype=np.float32)
data = tensor([])
for i in range(3):
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape(
......@@ -279,8 +269,7 @@ def test_syncbn2d():
running_mean = running_mean * momentum + mean * (1 - momentum)
running_var = running_var * momentum + var_unbiased * (1 - momentum)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -292,7 +281,7 @@ def test_syncbn2d():
var_backup = bn.running_var.numpy()
bn.training = False
xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32)
data.set_value(xv)
data = Tensor(xv)
yv1 = bn(data)
yv2 = bn(data)
assertTensorClose(yv1.numpy(), yv2.numpy(), max_err=0)
......@@ -306,7 +295,6 @@ def test_batchnorm_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 4)
bn = BatchNorm1d(8, track_running_stats=False)
data = tensor([])
for i in range(4):
if i == 2:
bn.training = False
......@@ -320,8 +308,7 @@ def test_batchnorm_no_stats():
).reshape((1, nr_chan, 1))
sd = np.sqrt(var + bn.eps)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -338,7 +325,6 @@ def test_syncbn_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 4)
bn = SyncBatchNorm(8, track_running_stats=False)
data = tensor([])
for i in range(4):
if i == 2:
bn.training = False
......@@ -352,8 +338,7 @@ def test_syncbn_no_stats():
).reshape((1, nr_chan, 1))
sd = np.sqrt(var + bn.eps)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -363,7 +348,6 @@ def test_batchnorm2d_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 16, 16)
bn = BatchNorm2d(8, track_running_stats=False)
data = tensor([])
for i in range(4):
if i == 2:
bn.training = False
......@@ -376,8 +360,7 @@ def test_batchnorm2d_no_stats():
var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
sd = np.sqrt(var + bn.eps)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -394,7 +377,6 @@ def test_syncbn2d_no_stats():
nr_chan = 8
data_shape = (3, nr_chan, 16, 16)
bn = SyncBatchNorm(8, track_running_stats=False)
data = tensor([])
for i in range(4):
if i == 2:
bn.training = False
......@@ -407,8 +389,7 @@ def test_syncbn2d_no_stats():
var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1))
sd = np.sqrt(var + bn.eps)
data.set_value(xv)
yv = bn(data)
yv = bn(Tensor(xv))
yv_expect = (xv - mean) / sd
assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
......@@ -12,7 +12,7 @@ import numpy as np
import pytest
import megengine as mge
from megengine import tensor
from megengine import Tensor
from megengine.module import Module
......@@ -35,12 +35,12 @@ def test_cambricon_module():
with open(model, "rb") as f:
data = f.read()
m = MyModule(data)
inputs = []
inputs.append(tensor(data=[], dtype=np.float16, device="cambricon0"))
inputs[0].set_value(np.random.normal(size=(1, 64, 32, 32)).astype(np.float16))
inp = Tensor(
np.random.normal((1, 64, 32, 32)).astype(np.float16), device="cambricon0"
)
def inference(inps):
pred = m(inps)
return pred
pred = inference(inputs)
pred = inference([inp])
......@@ -16,7 +16,7 @@ import pytest
import megengine as mge
import megengine.functional as F
from megengine import Buffer, Parameter, Tensor, tensor
from megengine import Parameter, Tensor, tensor
from megengine.module import (
BatchNorm1d,
BatchNorm2d,
......@@ -196,7 +196,7 @@ class MyModule(Module):
self.i = self.InnerModule()
self.bn = BatchNorm2d(4)
self.param = Parameter(np.ones(1, dtype=np.float32))
self.buff = Buffer(np.ones(1, dtype=np.float32))
self.buff = Tensor(np.ones(1, dtype=np.float32))
def forward(self, x):
x = self.i(x)
......@@ -464,8 +464,7 @@ def test_sequential_named_children():
def test_state_dict():
data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = tensor(np.random.random(data_shape))
mlp = MLP()
pred0 = mlp(data)
......@@ -542,8 +541,7 @@ def test_shared_param():
def test_pickle_module():
data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = tensor(np.random.random(data_shape))
mlp = MLP()
# pickle before forward
with BytesIO() as fout:
......@@ -568,8 +566,7 @@ def test_pickle_module():
@pytest.mark.skip(reason="under development")
def test_dump_model():
data_shape = (2, 28)
data = tensor([])
data.set_value(np.random.random(data_shape))
data = Tensor(np.random.random(data_shape))
mlp = MLP()
pred = mlp(data)
f = tempfile.NamedTemporaryFile(delete=False)
......
......@@ -13,7 +13,7 @@ import pytest
import megengine as mge
import megengine.functional as F
from megengine import Buffer, Parameter
from megengine import Parameter, Tensor
from megengine.module import Conv2d
from megengine.test import assertTensorClose
......@@ -33,7 +33,7 @@ def test_set_value():
@pytest.mark.skip(reason="fill unsupported")
def test_fill():
a = Buffer(np.zeros((2, 3), dtype=np.float32))
a = Tensor(np.zeros((2, 3), dtype=np.float32))
a.fill(3)
assertTensorClose(a.numpy(), np.full((2, 3), 3, dtype=np.float32))
a.fill(124.568)
......@@ -80,7 +80,7 @@ def test_fill():
# def test_shape_warning():
# with Graph() as cg:
# cg.set_option("eager_evaluation", False)
# b = Buffer(np.ones((2, 3)).astype(np.float32))
# b = Tensor(np.ones((2, 3)).astype(np.float32))
# with pytest.warns(None) as record:
# print(b.shape)
# if len(record) != 0:
......
......@@ -42,11 +42,11 @@ def test_single_input():
return x
net = Simple(av)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)
opt.clear_grad()
with gm.record():
with gm:
loss = net()
gm.backward(loss.sum())
opt.step()
......@@ -81,11 +81,11 @@ def test_multi_input():
return x
net = Simple(av, bv)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)
opt.clear_grad()
with gm.record():
with gm:
loss = net()
gm.backward(loss.sum())
opt.step()
......@@ -121,11 +121,11 @@ def test_multi_output():
return x + y
net = Simple(av, bv)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
opt = optimizer.SGD(net.parameters(), lr=1.0)
opt.clear_grad()
with gm.record():
with gm:
loss = net()
gm.backward(loss.sum())
opt.step()
......@@ -163,9 +163,9 @@ def test_skip_invalid_grad():
net = Simple(av, bv)
optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad()
with gm.record():
with gm:
loss = net().sum()
gm.backward(loss)
optim.step()
......@@ -198,10 +198,10 @@ def test_ste():
av = np.random.random(data_shape).astype(np.float32)
net = Simple(av)
optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad()
with gm.record():
with gm:
loss = net()
gm.backward(loss.sum())
optim.step()
......@@ -256,9 +256,9 @@ def test_none_in_out_grad():
b = tensor(np.array([2.0], dtype=np.float32))
net = Simple(a, b)
optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad()
with gm.record():
with gm:
loss, _ = net()
gm.backward(loss)
optim.step()
......@@ -293,10 +293,10 @@ def test_zero_grad():
a = tensor(np.array([1.0], dtype=np.float32))
net = Simple(a)
optim = optimizer.SGD(net.parameters(), lr=1.0)
gm = ad.GradManager().register(net.parameters())
gm = ad.GradManager().attach(net.parameters())
optim.clear_grad()
with gm.record():
with gm:
loss = net()
gm.backward(loss.sum())
optim.step()
......
......@@ -38,7 +38,7 @@ def cvt_to_shape_desc(val, inpvar, config=None):
if isinstance(val, RawTensor):
return as_tensor(val, device)
if not isinstance(val, collections.Iterable):
if not isinstance(val, collections.abc.Iterable):
val = [val]
components = []
......
......@@ -12,19 +12,18 @@ from tempfile import TemporaryFile
import numpy as np
import megengine as mge
from megengine import Buffer, Parameter, tensor
from megengine import Parameter, Tensor
def test_tensor_serialization():
def tensor_eq(a, b):
assert a.dtype == b.dtype
assert a.device == b.device
assert a.requires_grad == b.requires_grad
np.testing.assert_equal(a.numpy(), b.numpy())
with TemporaryFile() as f:
data = np.random.randint(low=0, high=7, size=[233])
a = tensor(data, device="xpux", dtype=np.int32)
a = Tensor(data, device="xpux", dtype=np.int32)
pickle.dump(a, f)
f.seek(0)
b = pickle.load(f)
......@@ -39,19 +38,19 @@ def test_tensor_serialization():
np.testing.assert_equal(a.numpy(), b.numpy())
with TemporaryFile() as f:
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
pickle.dump(a, f)
f.seek(0)
b = pickle.load(f)
assert isinstance(b, Buffer)
assert type(b) is Tensor
np.testing.assert_equal(a.numpy(), b.numpy())
with TemporaryFile() as f:
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
mge.save(a, f)
f.seek(0)
b = mge.load(f, map_location="cpux")
assert isinstance(b, Buffer)
assert type(b) is Tensor
assert "cpu" in str(b.device)
np.testing.assert_equal(a.numpy(), b.numpy())
......@@ -59,12 +58,12 @@ def test_tensor_serialization():
if mge.is_cuda_available():
device_org = mge.get_default_device()
mge.set_default_device("gpu0")
a = Buffer(np.random.random(size=(2, 233)).astype(np.float32))
a = Tensor(np.random.random(size=(2, 233)).astype(np.float32))
mge.save(a, f)
f.seek(0)
mge.set_default_device("cpux")
b = mge.load(f, map_location={"gpu0": "cpu0"})
assert isinstance(b, Buffer)
assert type(b) is Tensor
assert "cpu0" in str(b.device)
np.testing.assert_equal(a.numpy(), b.numpy())
mge.set_default_device(device_org)
......@@ -66,7 +66,7 @@ def main():
mge.set_default_device("cpux")
net = XORNet()
opt = optim.SGD(net.parameters(requires_grad=True), lr=0.01, momentum=0.9)
opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
batch_size = 64
train_dataset = minibatch_generator(batch_size)
val_dataset = minibatch_generator(batch_size)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册