!4058 modify parameter input

Merge pull request !4058 from lijiaqi/cell_inputs

!4058 modify parameter input
Merge pull request !4058 from lijiaqi/cell_inputs
cfae4096 · mindspore-ci-bot · Gitee · 4585509f · 881f3f73 · cfae4096
4 changed file
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -383,9 +383,13 @@ class Cell:
            inputs (Function or Cell): inputs of construct method.
        """
        parallel_inputs_run = []
-        if len(inputs) > self._construct_inputs_num:
-            raise ValueError('Len of inputs: {} is bigger than self._construct_inputs_num: {}.'.
-                             format(len(inputs), self._construct_inputs_num))
+        # judge if *args exists in input
+        if self.argspec[1] is not None:
+            prefix = self.argspec[1]
+            for i in range(len(inputs)):
+                key = prefix + str(i)
+                self._construct_inputs_names = self._construct_inputs_names + (key,)
+                self._construct_inputs_num = self._construct_inputs_num + 1
        for i, tensor in enumerate(inputs):
            key = self._construct_inputs_names[i]
            # if input is not used, self.parameter_layout_dict may not contain the key
@@ -412,7 +416,7 @@ class Cell:
        from mindspore._extends.parse.parser import get_parse_method_of_class

        fn = get_parse_method_of_class(self)
-        inspect.getfullargspec(fn)
+        self.argspec = inspect.getfullargspec(fn)
        self._construct_inputs_num = fn.__code__.co_argcount
        self._construct_inputs_names = fn.__code__.co_varnames


--- a/mindspore/nn/optim/momentum.py
+++ b/mindspore/nn/optim/momentum.py
@@ -41,7 +41,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment,


 class Momentum(Optimizer):
-    """
+    r"""
    Implements the Momentum algorithm.

    Refer to the paper on the importance of initialization and momentum in deep learning for more details.
@@ -56,13 +56,13 @@ class Momentum(Optimizer):
    .. math::
            v_{t} = v_{t-1} \ast u + gradients

-    If use_nesterov is True:
-        .. math::
-            p_{t} =  p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr)
+        If use_nesterov is True:
+            .. math::
+                p_{t} =  p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr)

-    If use_nesterov is Flase:
-        .. math::
-            p_{t} = p_{t-1} - lr \ast v_{t}
+        If use_nesterov is Flase:
+            .. math::
+                p_{t} = p_{t-1} - lr \ast v_{t}

    Here: where grad, lr, p, v and u denote the gradients, learning_rate, params, moments, and momentum respectively.


--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -32,7 +32,7 @@ def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, accum, s


 class SGD(Optimizer):
-    """
+    r"""
    Implements stochastic gradient descent (optionally with momentum).

    Introduction to SGD can be found at https://en.wikipedia.org/wiki/Stochastic_gradient_descent.
@@ -47,15 +47,15 @@ class SGD(Optimizer):
        To improve parameter groups performance, the customized order of parameters can be supported.

    .. math::
-        v_{t+1} = u \ast v_{t} + gradient \ast (1-dampening)
+            v_{t+1} = u \ast v_{t} + gradient \ast (1-dampening)

-    If nesterov is True:
-        .. math::
-            p_{t+1} = p_{t} - lr \ast (gradient + u \ast v_{t+1})
+        If nesterov is True:
+            .. math::
+                p_{t+1} = p_{t} - lr \ast (gradient + u \ast v_{t+1})

-    If nesterov is Flase:
-        .. math::
-            p_{t+1} = p_{t} - lr \ast v_{t+1}
+        If nesterov is Flase:
+            .. math::
+                p_{t+1} = p_{t} - lr \ast v_{t+1}

    To be noticed, for the first step, v_{t+1} = gradient


--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -82,7 +82,7 @@ class WithGradCell(Cell):

    Wraps the network with backward cell to compute gradients. A network with a loss function is necessary
    as argument. If loss function in None, the network must be a wrapper of network and loss function. This
-    Cell accepts data and label as inputs and returns gradients for each trainable parameter.
+    Cell accepts *inputs as inputs and returns gradients for each trainable parameter.

    Note:
        Run in PyNative mode.
@@ -95,8 +95,7 @@ class WithGradCell(Cell):
            output value. Default: None.

    Inputs:
-        - **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
-        - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
+        - **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.

    Outputs:
        list, a list of Tensors with identical shapes as trainable weights.
@@ -126,12 +125,12 @@ class WithGradCell(Cell):
            self.network_with_loss = WithLossCell(self.network, self.loss_fn)
        self.network_with_loss.set_train()

-    def construct(self, data, label):
+    def construct(self, *inputs):
        weights = self.weights
        if self.sens is None:
-            grads = self.grad(self.network_with_loss, weights)(data, label)
+            grads = self.grad(self.network_with_loss, weights)(*inputs)
        else:
-            grads = self.grad(self.network_with_loss, weights)(data, label, self.sens)
+            grads = self.grad(self.network_with_loss, weights)(*inputs, self.sens)
        return grads


@@ -139,7 +138,7 @@ class TrainOneStepCell(Cell):
    r"""
    Network training package class.

-    Wraps the network with an optimizer. The resulting Cell be trained with input data and label.
+    Wraps the network with an optimizer. The resulting Cell be trained with input *inputs.
    Backward graph will be created in the construct function to do parameter updating. Different
    parallel modes are available to run the training.

@@ -149,8 +148,7 @@ class TrainOneStepCell(Cell):
        sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0.

    Inputs:
-        - **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
-        - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
+        - **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.

    Outputs:
        Tensor, a scalar Tensor with shape :math:`()`.
@@ -181,11 +179,11 @@ class TrainOneStepCell(Cell):
            degree = _get_device_num()
            self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)

-    def construct(self, data, label):
+    def construct(self, *inputs):
        weights = self.weights
-        loss = self.network(data, label)
+        loss = self.network(*inputs)
        sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
-        grads = self.grad(self.network, weights)(data, label, sens)
+        grads = self.grad(self.network, weights)(*inputs, sens)
        if self.reducer_flag:
            # apply grad reducer on grads
            grads = self.grad_reducer(grads)