提交 cfae4096 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4058 modify parameter input

Merge pull request !4058 from lijiaqi/cell_inputs
......@@ -383,9 +383,13 @@ class Cell:
inputs (Function or Cell): inputs of construct method.
"""
parallel_inputs_run = []
if len(inputs) > self._construct_inputs_num:
raise ValueError('Len of inputs: {} is bigger than self._construct_inputs_num: {}.'.
format(len(inputs), self._construct_inputs_num))
# judge if *args exists in input
if self.argspec[1] is not None:
prefix = self.argspec[1]
for i in range(len(inputs)):
key = prefix + str(i)
self._construct_inputs_names = self._construct_inputs_names + (key,)
self._construct_inputs_num = self._construct_inputs_num + 1
for i, tensor in enumerate(inputs):
key = self._construct_inputs_names[i]
# if input is not used, self.parameter_layout_dict may not contain the key
......@@ -412,7 +416,7 @@ class Cell:
from mindspore._extends.parse.parser import get_parse_method_of_class
fn = get_parse_method_of_class(self)
inspect.getfullargspec(fn)
self.argspec = inspect.getfullargspec(fn)
self._construct_inputs_num = fn.__code__.co_argcount
self._construct_inputs_names = fn.__code__.co_varnames
......
......@@ -41,7 +41,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment,
class Momentum(Optimizer):
"""
r"""
Implements the Momentum algorithm.
Refer to the paper on the importance of initialization and momentum in deep learning for more details.
......@@ -56,13 +56,13 @@ class Momentum(Optimizer):
.. math::
v_{t} = v_{t-1} \ast u + gradients
If use_nesterov is True:
.. math::
p_{t} = p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr)
If use_nesterov is True:
.. math::
p_{t} = p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr)
If use_nesterov is Flase:
.. math::
p_{t} = p_{t-1} - lr \ast v_{t}
If use_nesterov is Flase:
.. math::
p_{t} = p_{t-1} - lr \ast v_{t}
Here: where grad, lr, p, v and u denote the gradients, learning_rate, params, moments, and momentum respectively.
......
......@@ -32,7 +32,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, s
class SGD(Optimizer):
"""
r"""
Implements stochastic gradient descent (optionally with momentum).
Introduction to SGD can be found at https://en.wikipedia.org/wiki/Stochastic_gradient_descent.
......@@ -47,15 +47,15 @@ class SGD(Optimizer):
To improve parameter groups performance, the customized order of parameters can be supported.
.. math::
v_{t+1} = u \ast v_{t} + gradient \ast (1-dampening)
v_{t+1} = u \ast v_{t} + gradient \ast (1-dampening)
If nesterov is True:
.. math::
p_{t+1} = p_{t} - lr \ast (gradient + u \ast v_{t+1})
If nesterov is True:
.. math::
p_{t+1} = p_{t} - lr \ast (gradient + u \ast v_{t+1})
If nesterov is Flase:
.. math::
p_{t+1} = p_{t} - lr \ast v_{t+1}
If nesterov is Flase:
.. math::
p_{t+1} = p_{t} - lr \ast v_{t+1}
To be noticed, for the first step, v_{t+1} = gradient
......
......@@ -82,7 +82,7 @@ class WithGradCell(Cell):
Wraps the network with backward cell to compute gradients. A network with a loss function is necessary
as argument. If loss function in None, the network must be a wrapper of network and loss function. This
Cell accepts data and label as inputs and returns gradients for each trainable parameter.
Cell accepts *inputs as inputs and returns gradients for each trainable parameter.
Note:
Run in PyNative mode.
......@@ -95,8 +95,7 @@ class WithGradCell(Cell):
output value. Default: None.
Inputs:
- **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
Outputs:
list, a list of Tensors with identical shapes as trainable weights.
......@@ -126,12 +125,12 @@ class WithGradCell(Cell):
self.network_with_loss = WithLossCell(self.network, self.loss_fn)
self.network_with_loss.set_train()
def construct(self, data, label):
def construct(self, *inputs):
weights = self.weights
if self.sens is None:
grads = self.grad(self.network_with_loss, weights)(data, label)
grads = self.grad(self.network_with_loss, weights)(*inputs)
else:
grads = self.grad(self.network_with_loss, weights)(data, label, self.sens)
grads = self.grad(self.network_with_loss, weights)(*inputs, self.sens)
return grads
......@@ -139,7 +138,7 @@ class TrainOneStepCell(Cell):
r"""
Network training package class.
Wraps the network with an optimizer. The resulting Cell be trained with input data and label.
Wraps the network with an optimizer. The resulting Cell be trained with input *inputs.
Backward graph will be created in the construct function to do parameter updating. Different
parallel modes are available to run the training.
......@@ -149,8 +148,7 @@ class TrainOneStepCell(Cell):
sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0.
Inputs:
- **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
Outputs:
Tensor, a scalar Tensor with shape :math:`()`.
......@@ -181,11 +179,11 @@ class TrainOneStepCell(Cell):
degree = _get_device_num()
self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
def construct(self, data, label):
def construct(self, *inputs):
weights = self.weights
loss = self.network(data, label)
loss = self.network(*inputs)
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
grads = self.grad(self.network, weights)(data, label, sens)
grads = self.grad(self.network, weights)(*inputs, sens)
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册