From 3c2bdaa8ceaa7ad725ebc7faead6cf7f29aaa40a Mon Sep 17 00:00:00 2001 From: levi131 <83750468+levi131@users.noreply.github.com> Date: Wed, 13 Oct 2021 10:59:51 +0800 Subject: [PATCH] unify usage of tuple and list (#36368) * modify format * modify format --- python/paddle/autograd/functional.py | 81 ++++++++----------- python/paddle/autograd/utils.py | 24 +++--- python/paddle/fluid/dygraph/base.py | 2 +- .../tests/unittests/autograd/test_vjp_jvp.py | 4 +- .../fluid/tests/unittests/autograd/utils.py | 14 ++-- 5 files changed, 56 insertions(+), 69 deletions(-) diff --git a/python/paddle/autograd/functional.py b/python/paddle/autograd/functional.py index 688e04335e..4d7fcd733c 100644 --- a/python/paddle/autograd/functional.py +++ b/python/paddle/autograd/functional.py @@ -18,20 +18,7 @@ from ..fluid import framework from ..fluid.dygraph import grad from ..nn.initializer import assign from ..tensor import reshape, zeros_like, to_tensor -from .utils import _check_tensors, _stack_tensor_or_return_none, _replace_none_with_zero_tensor - - -def to_tensorlist(tl): - if not isinstance(tl, list): - if isinstance(tl, tuple): - tl = list(tl) - else: - tl = [tl] - for t in tl: - assert isinstance(t, paddle.Tensor) or t is None, ( - f'{t} is expected to be paddle.Tensor or None, but found {type(t)}.' - ) - return tl +from .utils import _tensors, _stack_tensor_or_return_none, _replace_none_with_zero_tensor @contextlib.contextmanager @@ -98,19 +85,19 @@ def vjp(func, inputs, v=None, create_graph=False, allow_unused=False): reverse mode automatic differentiation. Args: - func(Callable): `func` takes as input a tensor or a list - of tensors and returns a tensor or a list of tensors. - inputs(list[Tensor]|Tensor): used as positional arguments - to evaluate `func`. `inputs` is accepted as one tensor - or a list of tensors. - v(list[Tensor]|Tensor, optional): the cotangent vector - invovled in the VJP computation. `v` matches the size - and shape of `func`'s output. Default value is None + func(Callable): `func` takes as input a tensor or a list/tuple + of tensors and returns a tensor or a list/tuple of tensors. + inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional + arguments to evaluate `func`. `inputs` is accepted as one + tensor or a list of tensors. + v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the + cotangent vector invovled in the VJP computation. `v` matches + the size and shape of `func`'s output. Default value is None and in this case is equivalent to all ones the same size of `func`'s output. - create_graph(bool, optional): if `True`, gradients can - be evaluated on the results. If `False`, taking gradients - on the results is invalid. Default value is False. + create_graph(bool, optional): if `True`, gradients can be + evaluated on the results. If `False`, taking gradients on + the results is invalid. Default value is False. allow_unused(bool, optional): In case that some Tensors of `inputs` do not contribute to the computation of the output. If `allow_unused` is False, an error will be raised, @@ -119,8 +106,9 @@ def vjp(func, inputs, v=None, create_graph=False, allow_unused=False): Returns: output(tuple): - func_out: the output of `func(inputs)` - vjp(list[Tensor]|Tensor): the pullback results of `v` on `func` + func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of + `func(inputs)` + vjp(list[Tensor]): the pullback results of `v` on `func` Examples: .. code-block:: python @@ -163,13 +151,13 @@ def vjp(func, inputs, v=None, create_graph=False, allow_unused=False): # [[2., 1.], # [1., 0.]]), None] """ - xs, v = to_tensorlist(inputs), to_tensorlist(v) + xs, v = _tensors(inputs, "inputs"), _tensors(v, "v") with gradient_scope( xs, v, create_graph=create_graph, allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: outputs = func(*xs) - ys = to_tensorlist(outputs) + ys = _tensors(outputs, "outputs") grads = grad_fn(ys, xs, v) outputs, grads = return_fn(outputs), return_fn(grads) @@ -186,16 +174,16 @@ def jvp(func, inputs, v=None, create_graph=False, allow_unused=False): **This API is ONLY available in imperative mode.** Args: - func(Callable): `func` takes as input a tensor or a list - of tensors and returns a tensor or a list of tensors. - inputs(list[Tensor]|Tensor): used as positional arguments - to evaluate `func`. `inputs` is accepted as one tensor - or a list of tensors. - v(list[Tensor]|Tensor, optional): the tangent vector - invovled in the JVP computation. `v` matches the size - and shape of `inputs`. `v` is Optional if `func` returns - a single tensor. Default value is None and in this case - is equivalent to all ones the same size of `inputs`. + func(Callable): `func` takes as input a tensor or a list/tuple + of tensors and returns a tensor or a list/tuple of tensors. + inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional + arguments to evaluate `func`. `inputs` is accepted as one + tensor or a list/tuple of tensors. + v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the + tangent vector invovled in the JVP computation. `v` matches + the size and shape of `inputs`. `v` is Optional if `func` + returns a single tensor. Default value is None and in this + case is equivalent to all ones the same size of `inputs`. create_graph(bool, optional): if `True`, gradients can be evaluated on the results. If `False`, taking gradients on the results is invalid. Default value is False. @@ -207,8 +195,9 @@ def jvp(func, inputs, v=None, create_graph=False, allow_unused=False): Returns: output(tuple): - func_out: the output of `func(inputs)` - jvp(list[Tensor]|Tensor): the pullback results of `v` on `func` + func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of + `func(inputs)` + jvp(list[Tensor]): the pullback results of `v` on `func` Examples: .. code-block:: python @@ -232,13 +221,13 @@ def jvp(func, inputs, v=None, create_graph=False, allow_unused=False): # [0., 0.]])] """ - xs, v = to_tensorlist(inputs), to_tensorlist(v) + xs, v = _tensors(inputs, "inputs"), _tensors(v, "v") with gradient_scope( xs, v, create_graph=create_graph, allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: outputs = func(*xs) - ys = to_tensorlist(outputs) + ys = _tensors(outputs, "outputs") ys_grad = [zeros_like(y) for y in ys] xs_grad = grad_fn(ys, xs, ys_grad, create_graph=True) ys_grad = grad_fn(xs_grad, ys_grad, v) @@ -357,8 +346,8 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): # [0., 0., 0., 2.]]), None)) ''' - inputs = _check_tensors(inputs, "inputs") - outputs = _check_tensors(func(*inputs), "outputs") + inputs = _tensors(inputs, "inputs") + outputs = _tensors(func(*inputs), "outputs") fin_size = len(inputs) fout_size = len(outputs) flat_outputs = tuple(reshape(output, shape=[-1]) for output in outputs) @@ -494,7 +483,7 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): # [0., 1., 1., 2.]]), None), (None, None)) ''' - inputs = _check_tensors(inputs, "inputs") + inputs = _tensors(inputs, "inputs") outputs = func(*inputs) assert isinstance(outputs, paddle.Tensor) and outputs.shape == [ 1 diff --git a/python/paddle/autograd/utils.py b/python/paddle/autograd/utils.py index d437f7d82d..81fe19c168 100644 --- a/python/paddle/autograd/utils.py +++ b/python/paddle/autograd/utils.py @@ -15,22 +15,20 @@ import paddle -def _check_tensors(in_out_list, name): - assert in_out_list is not None, "{} should not be None".format(name) - - if isinstance(in_out_list, (list, tuple)): - assert len(in_out_list) > 0, "{} connot be empyt".format(name) - for each_var in in_out_list: +def _tensors(ts, name): + if isinstance(ts, (list, tuple)): + assert len(ts) > 0, "{} connot be empty".format(name) + for each_t in ts: assert isinstance( - each_var, - paddle.Tensor), "Elements of {} must be paddle.Tensor".format( - name) - return list(in_out_list) + each_t, paddle.Tensor + ) or each_t is None, "Elements of {} must be paddle.Tensor or None".format( + name) + return list(ts) else: assert isinstance( - in_out_list, - paddle.Tensor), "{} must be Tensor or list of Tensor".format(name) - return [in_out_list] + ts, paddle.Tensor + ) or ts is None, "{} must be Tensor or list of Tensor".format(name) + return [ts] def _stack_tensor_or_return_none(origin_list): diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 18052fa7d4..460831f874 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -456,7 +456,7 @@ def grad(outputs, the Tensors whose gradients are not needed to compute. Default None. Returns: - tuple: a tuple of Tensors, whose length is the same as the Tensor number + list: a list of Tensors, whose length is the same as the Tensor number inside `inputs`, and the i-th returned Tensor is the sum of gradients of `outputs` with respect to the i-th `inputs`. diff --git a/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py b/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py index 86331d36a3..f3680ab2a6 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py @@ -15,7 +15,7 @@ import unittest import paddle -from paddle.autograd.functional import vjp, jvp, to_tensorlist +from paddle.autograd.functional import vjp, jvp, _tensors from paddle import grad, ones_like, zeros_like @@ -55,7 +55,7 @@ def nested(x): def make_v(f, inputs): - outputs = to_tensorlist(f(*inputs)) + outputs = _tensors(f(*inputs), "outputs") return [ones_like(x) for x in outputs] diff --git a/python/paddle/fluid/tests/unittests/autograd/utils.py b/python/paddle/fluid/tests/unittests/autograd/utils.py index 0aadef4a80..3087e93205 100644 --- a/python/paddle/fluid/tests/unittests/autograd/utils.py +++ b/python/paddle/fluid/tests/unittests/autograd/utils.py @@ -14,7 +14,7 @@ import numpy as np import paddle -from paddle.autograd.functional import _check_tensors +from paddle.autograd.functional import _tensors def _product(t): @@ -42,8 +42,8 @@ def _set_item(t, idx, value): def _compute_numerical_jacobian(func, xs, delta, np_dtype): - xs = _check_tensors(xs, "xs") - ys = _check_tensors(func(*xs), "ys") + xs = _tensors(xs, "xs") + ys = _tensors(func(*xs), "ys") fin_size = len(xs) fout_size = len(ys) jacobian = list([] for _ in range(fout_size)) @@ -59,11 +59,11 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): orig = _get_item(xs[j], q) x_pos = orig + delta xs[j] = _set_item(xs[j], q, x_pos) - ys_pos = _check_tensors(func(*xs), "ys_pos") + ys_pos = _tensors(func(*xs), "ys_pos") x_neg = orig - delta xs[j] = _set_item(xs[j], q, x_neg) - ys_neg = _check_tensors(func(*xs), "ys_neg") + ys_neg = _tensors(func(*xs), "ys_neg") xs[j] = _set_item(xs[j], q, orig) @@ -76,8 +76,8 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): def _compute_numerical_hessian(func, xs, delta, np_dtype): - xs = _check_tensors(xs, "xs") - ys = _check_tensors(func(*xs), "ys") + xs = _tensors(xs, "xs") + ys = _tensors(func(*xs), "ys") fin_size = len(xs) hessian = list([] for _ in range(fin_size)) for i in range(fin_size): -- GitLab