From 9e764d82036d91333e95a75348ba7c3b8f583005 Mon Sep 17 00:00:00 2001 From: Xiaoxu Chen Date: Sat, 2 Apr 2022 06:51:55 +0800 Subject: [PATCH] Enhance vjp/jvp/Jacobian/Hessian API for supporting dynamic, static graph and batched, unbatched mode (#40692) * modify vjp/jvp for both dynamic and static graph * enforce jacobian class for supporting first/last batch * add unittest for jvp, jacobian withlast batch, jacobian with first batch * fix the incorrect shape when multi-index Jacobian * enforce Hessian class for supporting dynamic graph * add Hessian class unittest * bugfix, jvp double_backward_trick zeros_like return stop_gradient=True in static graph * add API beta warnnings * add white_list for cuda11.x ci windows. * optimize some code snippets and documments * set unittest timeout to 100 seconds * move vjp,jvp,Jacobian,Hessian to incubate * fix vjp,vjp import path of sample code * fix code style error of augtograd/__init__ file --- python/paddle/autograd/__init__.py | 18 +- python/paddle/autograd/functional.py | 1081 +++++++++------ python/paddle/autograd/utils.py | 45 - .../tests/unittests/autograd/CMakeLists.txt | 5 +- .../fluid/tests/unittests/autograd/config.py | 49 + .../test_autograd_functional_dynamic.py | 1233 +++++++++++++++++ .../test_autograd_functional_static.py | 455 ++++++ .../autograd/test_autograd_static.py | 308 ---- .../tests/unittests/autograd/test_hessian.py | 263 ---- .../tests/unittests/autograd/test_jacobian.py | 319 ----- .../tests/unittests/autograd/test_vhp.py | 182 --- .../tests/unittests/autograd/test_vjp_jvp.py | 315 ----- .../fluid/tests/unittests/autograd/utils.py | 231 ++- python/paddle/incubate/__init__.py | 1 + python/paddle/incubate/autograd/__init__.py | 18 + python/setup.py.in | 1 + tools/windows/run_unittests.sh | 114 +- 17 files changed, 2731 insertions(+), 1907 deletions(-) delete mode 100644 python/paddle/autograd/utils.py create mode 100644 python/paddle/fluid/tests/unittests/autograd/config.py create mode 100644 python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py create mode 100644 python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py delete mode 100644 python/paddle/fluid/tests/unittests/autograd/test_autograd_static.py delete mode 100644 python/paddle/fluid/tests/unittests/autograd/test_hessian.py delete mode 100644 python/paddle/fluid/tests/unittests/autograd/test_jacobian.py delete mode 100644 python/paddle/fluid/tests/unittests/autograd/test_vhp.py delete mode 100644 python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py create mode 100644 python/paddle/incubate/autograd/__init__.py diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py index 7aab7117de..b13a4591b4 100644 --- a/python/paddle/autograd/__init__.py +++ b/python/paddle/autograd/__init__.py @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,12 +13,18 @@ # limitations under the License. from ..fluid.dygraph.base import grad # noqa: F401 +from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401 +from ..framework import is_grad_enabled, set_grad_enabled # noqa: F401 from . import backward_mode # noqa: F401 from .backward_mode import backward # noqa: F401 from .py_layer import PyLayer, PyLayerContext, EagerPyLayer, EagerPyLayerContext # noqa: F401 from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401 from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401 -from .functional import jacobian, hessian, batch_jacobian, batch_hessian # noqa: F401 -from .functional import vjp, jvp, vhp # noqa: F401 +from .functional import vjp, jvp, Jacobian, Hessian # noqa: F401 +from .functional import jacobian, hessian, batch_jacobian, batch_hessian, vhp # noqa: F401 -__all__ = ['backward', 'PyLayer', 'PyLayerContext'] +__all__ = [ # noqa + 'backward', + 'PyLayer', + 'PyLayerContext', +] diff --git a/python/paddle/autograd/functional.py b/python/paddle/autograd/functional.py index c663d37e7f..8e027c270b 100644 --- a/python/paddle/autograd/functional.py +++ b/python/paddle/autograd/functional.py @@ -12,236 +12,686 @@ # See the License for the specific language governing permissions and # limitations under the License. -import contextlib +import functools +import typing + import paddle -from paddle.static import gradients -from ..fluid import framework -from ..fluid.dygraph import grad -from ..tensor.creation import assign -from ..tensor import reshape, zeros_like, to_tensor -from .utils import _tensors, _stack_tensor_or_return_none, _replace_none_with_zero_tensor - - -@contextlib.contextmanager -def gradient_scope(*var_lists, create_graph=False, allow_unused=False): - def grad_fn(ys, xs, v=None, create_graph=create_graph): - if v is not None: - assert len(ys) == len(v), ( - f'The argument {v} is expected to be of the same size as the output. ' - f'Here the output is {ys}, and `v` is {v}.') - if allow_unused: - ys = [ - to_tensor( - [0.0], stop_gradient=False) if y is None else y for y in ys - ] - return grad( - ys, xs, v, create_graph=create_graph, allow_unused=allow_unused) - - def return_fn(out): - if isinstance(out, paddle.Tensor): - if not create_graph: - out = out.detach() - return out - if isinstance(out, list): - return list(return_fn(x) for x in out) - elif isinstance(out, tuple): - return tuple(return_fn(x) for x in out) - else: - assert out is None - return out - - def process(vl): - if vl is None: - return None - out = [] - # If v is treated as constant in the outer scope, its gradient is guaranteed - # not to be taken beyond this scope. Within this scope, however, v's gradient - # may be computed. We only need to detach v in this case. - # Otherwise, v's gradient is valid, and is subject to update beyond this scope. - # In this case we must not confuse the gradient in the outer scope with the - # inner one's. Moreover, we need to make sure that the result from the inner - # scope can flow back to the outer scope. This can be satisfied by extending - # the original variable with a duplication operation v1 = v so that v still - # maintains the complete lineage. - for v in vl: - if v is None: - out.append(v) - continue - if create_graph and not v.stop_gradient: - v = assign(v) - else: - v = v.detach() - v.stop_gradient = False - out.append(v) - return out - - try: - var_lists = [process(vl) for vl in var_lists] - bundle = var_lists + [grad_fn, return_fn] - yield bundle - finally: - pass +from paddle.fluid import framework -@framework.dygraph_only -def vjp(func, inputs, v=None, create_graph=False, allow_unused=False): +def vjp(func, xs, v=None): r"""Computes the Vector-Jacobian product, a functional form of reverse mode automatic differentiation. + Warning: + This API is in beta, the signatures could be changed in future version. + Args: - func(Callable): `func` takes as input a tensor or a list/tuple - of tensors and returns a tensor or a list/tuple of tensors. - inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional - arguments to evaluate `func`. `inputs` is accepted as one - tensor or a list of tensors. - v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the - cotangent vector invovled in the VJP computation. `v` matches - the size and shape of `func`'s output. Default value is None - and in this case is equivalent to all ones the same size - of `func`'s output. - create_graph(bool, optional): if `True`, gradients can be - evaluated on the results. If `False`, taking gradients on - the results is invalid. Default value is False. - allow_unused(bool, optional): In case that some Tensors of - `inputs` do not contribute to the computation of the output. - If `allow_unused` is False, an error will be raised, - Otherwise, the gradients of the said inputs are returned - None. Default value is False. + func(Callable): A function that takes ``xs`` as inputs parameter and + returns a sequence of Tensors or a Tensor. + xs(Tensor|Sequence[Tensor]): Used as positional arguments to evaluate + ``func``. ``xs`` is accepted as one Tensor or a sequence of Tensors. + v(Tensor|Sequence[Tensor]|None, optional): The cotangent vector invovled + in the VJP computation. ``v`` matches the size and shape of + ``func`` 's output. Defaults to None, which is equivalent to all + ones the same size of ``func`` 's output. Returns: output(tuple): - func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of - `func(inputs)` - vjp(list[Tensor]): the pullback results of `v` on `func` + + - func_out(Tensor|tuple[Tensor]): The output of ``func(xs)`` . + - vjp(Tensor|tuple[Tensor]): The vjp result. Examples: - .. code-block:: python - - def func(x): - return paddle.matmul(x, x) - - x = paddle.ones(shape=[2, 2], dtype='float32') - output, inputs_grad = vjp(func, x) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [[4., 4.], - # [4., 4.]])] - - v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]]) - output, inputs_grad = vjp(func, x, v) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [[2., 1.], - # [1., 0.]])] - - output, inputs_grad = vjp(func, x, v, create_graph=True) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False, - # [[2., 1.], - # [1., 0.]])] - - y = paddle.ones(shape=[2, 2], dtype='float32') - def func_unused(x, y): - return paddle.matmul(x, x) - - output, inputs_grad = vjp(func, [x, y], v) - # ValueError: (InvalidArgument) The 1-th input does not appear in the backward graph. - # Please check the input variable or set allow_unused=True to get None result. - # [Hint: Expected allow_unused_ == true, but received allow_unused_:0 != true:1.] - - output, inputs_grad = vjp(func, [x, y], v, allow_unused=True) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [[2., 1.], - # [1., 0.]]), None] + + .. code-block:: python + + import paddle + + def func(x): + return paddle.matmul(x, x) + + x = paddle.ones(shape=[2, 2], dtype='float32') + _, vjp_result = paddle.incubate.autograd.vjp(func, x) + print(vjp_result) + # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[4., 4.], + # [4., 4.]]) + + v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]]) + _, vjp_result = paddle.incubate.autograd.vjp(func, x, v) + print(vjp_result) + # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[2., 1.], + # [1., 0.]]) """ - xs = _tensors(inputs, "inputs") - if v is not None: - v = _tensors(v, "v") + _check_inputs(func, xs, v) - with gradient_scope( - xs, v, create_graph=create_graph, - allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: - outputs = func(*xs) - ys = _tensors(outputs, "outputs") - grads = grad_fn(ys, xs, v) - outputs, grads = return_fn(outputs), return_fn(grads) + # ``_seprate`` breaks the dependencies between ``xs`` and other + # variables. See more ``_seprate`` . + xs, v = _separate(xs), _separate(v) + ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs) + _check_v_shape(v, ys) - return outputs, grads + return ys, _grad(ys, xs, v) -@framework.dygraph_only -def jvp(func, inputs, v=None, create_graph=False, allow_unused=False): +def jvp(func, xs, v=None): r""" Computes the Jacobian-Vector product for a function at the given inputs and a vector in the tangent space induced by the inputs. - .. note:: - **This API is ONLY available in imperative mode.** + Warning: + This API is in beta, the signatures could be changed in future version. Args: - func(Callable): `func` takes as input a tensor or a list/tuple - of tensors and returns a tensor or a list/tuple of tensors. - inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional - arguments to evaluate `func`. `inputs` is accepted as one - tensor or a list/tuple of tensors. - v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the - tangent vector invovled in the JVP computation. `v` matches - the size and shape of `inputs`. `v` is Optional if `func` - returns a single tensor. Default value is None and in this - case is equivalent to all ones the same size of `inputs`. - create_graph(bool, optional): if `True`, gradients can - be evaluated on the results. If `False`, taking gradients - on the results is invalid. Default value is False. - allow_unused(bool, optional): In case that some Tensors of - `inputs` do not contribute to the computation of the output. - If `allow_unused` is False, an error will be raised, - Otherwise, the gradients of the said inputs are returned - None. Default value is False. + func(Callable): The ``func`` takes as input a Tensor or a Sequence + of Tensors and returns a Tensor or a Sequence of Tensors. + xs(Tensor|Sequence[Tensor]): Used as positional arguments to + evaluate ``func``. The ``xs`` is accepted as one Tensor or a + Sequence of Tensors. + v(Tensor|Sequence[Tensor]|None, Optional): The tangent vector invovled + in the JVP computation. The ``v`` matches the size and shape of + ``xs`` . Default value is None and in this case is equivalent to + all ones the same size of ``xs`` . Returns: output(tuple): - func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of - `func(inputs)` - jvp(list[Tensor]): the pullback results of `v` on `func` + + - func_out(Tensor|tuple[Tensor]): The output of ``func(xs)`` . + - jvp(Tensor|tuple[Tensor]): The jvp result. + + Examples: + + .. code-block:: python + + import paddle + + + def func(x): + return paddle.matmul(x, x) + + + x = paddle.ones(shape=[2, 2], dtype='float32') + _, jvp_result = paddle.incubate.autograd.jvp(func, x) + print(jvp_result) + # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[4., 4.], + # [4., 4.]]) + v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]]) + _, jvp_result = paddle.incubate.autograd.jvp(func, x, v) + print(jvp_result) + # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[2., 1.], + # [1., 0.]]) + + """ + _check_inputs(func, xs, v) + # ``_seprate`` breaks the dependencies between ``xs`` and other + # variables. See more ``_seprate`` . + xs, v = _separate(xs), _separate(v) + ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs) + _check_v_shape(v, xs) + return ys, _double_backward_trick(ys, xs, v) + + +def _double_backward_trick(ys, xs, v): + """Double backward trick for computing ``jvp`` by ``vjp`` + see details: https://j-towns.github.io/2017/06/12/A-new-trick.html + """ + # The value of ys_grad is not important, it can be any random value in + # theory, but it's required to set stop_gradient=False. + ys_grad = _zeros_like_with_grad(ys) + xs_grad = _grad(ys, xs, ys_grad) + return _grad(xs_grad, ys_grad, v) + + +def _zeros_like_with_grad(xs): + """Create a zero or zeros sequence Tensor like ``xs`` with a flag + ``stop_graident=False`` . + """ + if not isinstance(xs, typing.Sequence): + ys = paddle.zeros_like(xs) + ys.stop_gradient = False + else: + ys = [] + for x in xs: + y = paddle.zeros_like(x) + y.stop_gradient = False + ys.append(y) + return ys + + +class Jacobian(object): + r""" + Computes the Jacobian matrix of a given function. + + If the function has multiple inputs and multiple outputs, during internal + implementation, all input tensors are concatenated after being flatten, + the batch dimension is retained, and the output is subject to the same + processing rules. + + Once the Jacobian ``J`` is constructed, you can use a multidimensional index + to retrieve the submatrix of ``J``, as same as slicing a Tensor. The + submatrix is lazily evaluated along row axis, and will be cached once + evaluated. + + For examples, supposing ``is_batched=True``, you can retrieve the submatrix + by following methods: + + * J[:], retrieving the full matrix. + * J[:, :, j], retrieving the partial derivatives w.r.t. the j'th input + variable. + * J[:, i, :], retrieving the partial derivatives w.r.t. the i'th output + variable. + * J[:, i, j], retrieving the partial derivatives w.r.t. the i'th output + variable and the j'th input variable. + + Notes: + + Eclipsis index is not supported currently. + + Warning: + This API is in beta, the signatures could be changed in future version. + + Args: + + func (Callable): A python function that takes a Tensor or a sequence of + Tensors as inputs(the first dimension is batch size) and + returns a Tensor a sequence of Tensors. + xs (Tensor|Sequence[Tensor]): The input to the function ``func`` . + is_batched (bool): If true, the first axis is batch axis. Defaults to + False. + + Returns: + + Jacobian (Object): A python object retains the Jacobian matrix. + + Examples: + + .. code-block:: python + + import paddle + + + def func(x, y): + return paddle.matmul(x, y) + + + x = paddle.to_tensor([[1., 2.], [3., 4.]]) + J = paddle.incubate.autograd.Jacobian(func, [x, x]) + print(J[:, :]) + # Tensor(shape=[4, 8], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[1., 3., 0., 0., 1., 0., 2., 0.], + # [2., 4., 0., 0., 0., 1., 0., 2.], + # [0., 0., 1., 3., 3., 0., 4., 0.], + # [0., 0., 2., 4., 0., 3., 0., 4.]]) + + print(J[0, :]) + # Tensor(shape=[8], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [1., 3., 0., 0., 1., 0., 2., 0.]) + print(J[:, 0]) + # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [1., 2., 0., 0.]) + + """ + + def __init__(self, func, xs, is_batched=False): + if not is_batched: + self._jacobian = _JacobianNoBatch(func, xs) + else: + self._jacobian = _JacobianBatchFirst(func, xs) + + def __getitem__(self, indexes): + return self._jacobian[indexes] + + @property + def shape(self): + """The shape of flattened Jacobian matrix. + """ + return self._jacobian.shape + + +class Hessian(object): + """ + Computes the Hessian matrix with a given ``func`` with respect to ``xs`` . + + If the function has multiple inputs, during internal implementation, + all input tensors are concatenated after being flatten, the batch dimension + is retained. + + The Hessian submatrix is lazily evaluated, and can be retrieved with a + multidimensional indexes. See details ``Jacobian`` . + + Warning: + This API is in beta, the signatures could be changed in future version. + + Args: + func (Callable): A python function that takes a Tensor or a Tensor + sequence as inputs and returns a Tensor with shape + ``[batch_size, 1]`` with batch or ``[1]`` without batch. + xs (Tensor|Sequence(Tensor)): The input Tensor or Tensor sequence of + the function ``func``. + is_batched (bool): If true, the first axis is batch axis. Defaults to + False. + + Returns: + + Hessian (Object): A python object retains the Hessian matrix. + Examples: + .. code-block:: python - def func(x): - return paddle.matmul(x, x) + import paddle - x = paddle.ones(shape=[2, 2], dtype='float32') - output, inputs_grad = jvp(func, x) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [[2., 2.], - # [2., 2.]])] + def reducer(x): + return paddle.sum(x * x) - v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]]) - output, inputs_grad = vjp(func, x, v) - print(inputs_grad) - # [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, - # [[1., 1.], - # [0., 0.]])] + x = paddle.rand([2, 2]) + h = paddle.incubate.autograd.Hessian(reducer, x) + print(h[:]) + # Tensor(shape=[4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False, + # [[2., 0., 0., 0.], + # [0., 2., 0., 0.], + # [0., 0., 2., 0.], + # [0., 0., 0., 2.]]) """ - xs = _tensors(inputs, "inputs") - if v is not None: - v = _tensors(v, "v") - with gradient_scope( - xs, v, create_graph=create_graph, - allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: - outputs = func(*xs) - ys = _tensors(outputs, "outputs") - ys_grad = [zeros_like(y) for y in ys] - xs_grad = grad_fn(ys, xs, ys_grad, create_graph=True) - ys_grad = grad_fn(xs_grad, ys_grad, v) - outputs, ys_grad = return_fn(outputs), return_fn(ys_grad) + def __init__(self, func, xs, is_batched=False): + def _jac_func(*xs): + jac = Jacobian(func, xs, is_batched=is_batched) + if (is_batched and jac.shape[1] != 1) or (not is_batched and + jac.shape[0] != 1): + raise RuntimeError( + "The function given to Hessian shoud return as single element Tensor or batched single element Tensor." + ) + return jac[:, 0, :] if is_batched else jac[0, :] + + self.symbolic = Jacobian(_jac_func, xs, is_batched=is_batched) + + def __getitem__(self, indexes): + return self.symbolic[indexes] - return outputs, ys_grad + @property + def shape(self): + """The shape of flattened Hessian matrix. + """ + return self.symbolic.shape + + +class _Jacobian(object): + """The base class for computing Jacobian matrix. + + ``_Jacobian`` implementes the core logic of multidimensional index and lazy + evaluation for Jacobian matrix, subclass only need to overwrite following + methods: + + * ``_lazy_axis()``, return the axis along which will be lazy + evaluating. + * ``_flatten(xs)``, flattens the inputs ``xs``. + * ``_evaluate(index)``, evaluates one slice along ``_lazy_axis`` . + + Notes: + + Because currently PaddlePaddle only support reverse differentiation by + ``paddle.grad``, so lazy evaluation is only supported along the row of + Jacobian matrix, which means that slicing along row will get better + performance. + + """ + + def __init__(self, func, xs): + self._xs = _separate(xs) + self._ys = func(*_as_tensors(self._xs)) + self._flatten_xs = self._flatten(_as_tensors(self._xs)) + self._flatten_ys = self._flatten(_as_tensors(self._ys)) + self._cache = {} + + @property + def shape(self): + raise NotImplementedError + + @property + def _lazy_axis(self): + """"The axis of lazily evaluated.""" + raise NotImplementedError + + def _lazy_indexes(self, indexes): + idx = indexes[self._lazy_axis] + return (idx, ) if isinstance( + idx, int) else tuple(range(idx.start, idx.stop, idx.step)) + + def _flatten(self, xs): + raise NotImplementedError + + def _shifted_indexes(self, indexes, lazy_axis_size=0): + idx = indexes[self._lazy_axis] + shifted_lazy_axis_idx = 0 if isinstance( + idx, int) else slice(0, lazy_axis_size, 1) + return indexes[:self._lazy_axis] + (shifted_lazy_axis_idx, + ) + indexes[self._lazy_axis + 1:] + + def __getitem__(self, indexes): + indexes = _multi_index(indexes, self.shape) + + if isinstance(indexes[self._lazy_axis], int): + other_indexes = indexes[:self._lazy_axis] + \ + indexes[self._lazy_axis+1:] + return self._cached_evaluate(indexes[self._lazy_axis])[ + other_indexes] + lazy_indexes = self._lazy_indexes(indexes) + part_jac = paddle.stack( + [self._cached_evaluate(i) for i in lazy_indexes], + axis=self._lazy_axis) + return part_jac[self._shifted_indexes(indexes, len(lazy_indexes))] + + def _cached_evaluate(self, k): + v = self._cache.get(k) + if v is None: + v = self._evaluate(k) + self._cache[k] = v + return v + + def _evaluate(self, index): + """Evaluate one slice at along lazy axis.""" + raise NotImplementedError + + +class _JacobianNoBatch(_Jacobian): + """Compute Jacobian matrix without batch dimension. + Suppose the mapping is :math:`f: R^M \to R^N`, the output shape is + ``(N, M)`` . + """ + + def __init__(self, func, xs): + super(_JacobianNoBatch, self).__init__(func, xs) + + @property + def shape(self): + return (self._flatten_ys.shape[0], self._flatten_xs.shape[0]) + + @property + def _lazy_axis(self): + return 0 + + def _flatten(self, xs): + return paddle.concat(tuple(x.reshape((-1, )) for x in xs)) + + def _evaluate(self, row_index): + return self._flatten(_grad( + self._flatten_ys[row_index], + self._xs, )) + + +class _JacobianBatchLast(_Jacobian): + """Compute Jacobian matrix with batch at last axis. + Suppose the mapping is :math:`f: R^{M,B} \to R^{N,B}`, the output shape is + ``(N, M, B)`` . + """ + + def __init__(self, func, xs): + super(_JacobianBatchLast, self).__init__(func, xs) + + @property + def shape(self): + return (self._flatten_ys.shape[0], self._flatten_xs.shape[0], + self._flatten_xs.shape[1]) + + @property + def _lazy_axis(self): + return 0 + + def _flatten(self, xs): + return paddle.concat( + tuple(x.reshape((-1, x.shape[-1])) for x in _as_tensors(xs)), 0) + + def _evaluate(self, row): + return self._flatten(_grad(self._flatten_ys[row, :], self._xs)) + + +class _JacobianBatchFirst(_Jacobian): + """Compute Jacobian matrix with batch at first axis. + Suppose the mapping is :math:`f: R^{B,M} \to R^{B,N}`, the output shape is + ``(B, N, M)`` . + """ + + def __init__(self, func, xs): + super(_JacobianBatchFirst, self).__init__(func, xs) + + @property + def shape(self): + return (self._flatten_xs.shape[0], self._flatten_ys.shape[1], + self._flatten_xs.shape[1]) + + @property + def _lazy_axis(self): + return 1 + + def _flatten(self, xs): + return paddle.concat( + tuple(x.reshape((x.shape[0], -1)) for x in _as_tensors(xs)), 1) + + def _evaluate(self, row_index): + return self._flatten(_grad(self._flatten_ys[:, row_index], self._xs)) + + +def _multi_index(indexes, shape): + """A tool for parsing N-dimensional index into a standard format. + + Currently supporting following input format: + * ([positive|negative|slice], ...), the right-most elements can be + omited. + + The standard format after converted is slice tuple which contains N elements: + * ([positive|slice], ..., [positive|slice]) + + Notes: + Ellipsis indexes such as ``(..., i), (i, ...)`` is not supported. + + Args: + indexes (tuple): The input indexes. + shape (tuple): The input shape. + + Returns: + tuple: The standard format index as the above description. + """ + indexes = indexes if isinstance(indexes, typing.Sequence) else (indexes, ) + if any(isinstance(i, type(Ellipsis)) for i in indexes): + raise IndexError('Ellipsis index currently is not supported.') + # Fill the right-most elements. + indexes = indexes + (slice(0, None, None), ) * (len(shape) - len(indexes)) + # Convert to positive index. + positive_indexes = [] + for i, index in enumerate(indexes): + if isinstance(index, slice): + index = slice(index.start or 0, index.stop or shape[i], + index.step or 1) + positive_indexes.append( + slice( + index.start + shape[i] if index.start < 0 else index.start, + index.stop + shape[i] if index.stop < 0 else index.stop, + # Negative step means index backward, no need to convert to + # positive interger. + index.step)) + elif isinstance(index, int): + positive_indexes.append(index + shape[i] if index < 0 else index) + else: + raise TypeError(f'Not supported index type {index}.') + return tuple(positive_indexes) + + +def _as_tensors(xs): + return (xs, ) if isinstance(xs, framework.Variable) else xs + + +def _stack_tensor_or_return_none(origin_list): + assert len(origin_list) > 0, "Can't not stack an empty list" + return paddle.stack( + origin_list, axis=0) if isinstance( + origin_list[0], paddle.fluid.framework.Variable) else None + + +def _replace_none_with_zero_tensor(xs, refs): + if xs is None: + xs = paddle.zeros_like(refs) + xs.stop_gradient = refs.stop_gradient + return xs + elif isinstance(xs, typing.Sequence): + return tuple( + _replace_none_with_zero_tensor(x, refs[i]) + for i, x in enumerate(xs)) + else: + return xs + + +def _grad(ys, xs, v=None): + """A gradient function that can be used in dynamic graph and static graph. + + The ``grad`` combines ``paddle.grad`` used in dynamic graph and + ``paddle.static.gradients`` used in static graph, and do following changes: + + * The ``allow_unused`` flag is removed and set defaults to true internally, + none in outputs will be replaced by zero tensor. + * The ``create_graph`` flag is removed and set defaults to true internally, + only makes sense in dynamic graph. + * When xs is a single Tensor, ``paddle.grad`` returns a list which only + contains one Tensor. It may confuse users, thus in this case we improve + to return a single Tensor in _grad interface. + + Args: + ys (Tensor|Sequence[Tensor]): The output tensor or tensor sequence of + the graph to compute gradients. + xs (Tensor|Sequence[Tensor]): The input tensor or tensor sequence of the graph to + compute gradients. The returned values of this API are the + gradients of inputs . + v (Tensor|Sequence[Tensor]|None,optional): The initial gradient values + of outputs . If grad_outputs is None, the initial gradient values of + outputs would be Tensors filled with 1; if grad_outputs is not None, + it must have the same length as outputs , and in this case, the + initial gradient value of the i-th outputs would be: (1) a Tensor + filled with 1 when the i-th element of grad_outputs is None; + (2) the i-th element of grad_outputs when the i-th element of + grad_outputs is a Tensor. Default None. + + Returns: + Tensor|tuple[Tensor]: Tensor or a tuple of Tensors, whose length is the + same as the Tensor number inside inputs, and the i-th returned + Tensor is the sum of gradients of outputs with respect to the i-th + inputs. + """ + if paddle.fluid._non_static_mode(): + xs_grad = paddle.grad(ys, xs, v, create_graph=True, allow_unused=True) + else: + xs_grad = paddle.static.gradients(ys, xs, v) + + if isinstance(xs, paddle.fluid.framework.Variable): + xs_grad = xs_grad[0] + + return _replace_none_with_zero_tensor(xs_grad, xs) + + +def _separate(xs): + """ + ``_separate`` separates ``xs`` from the computation graph through ``clone`` + or ``deteach`` . + + Interally, ``paddle.grad(xs, ys)`` is stateful API implemented based on + computional graph, which will reduce gradients along all path from ys to xs. + + However, funcional autograd API such as ``vjp``, ``jvp`` is stateless, and + only compute gradients with a given ``func`` . + + For example, given a ``func`` :math:`y0=f(x0)`, supposing forward path is: + ``x0 -> y0``, ``x0 -> x1 -> y0`` . + ``paddle.grad(y0, x0)`` will reduce gradients along ``y0->x0`` and + ``y0->x1->x0``, and ``vjp`` only need reduce along ``y0->x0``. + + So, it's needed to clone or detach xs for breaking the dependencies with + other variables. + + Examples: + + .. code-block:: python + + import paddle + from paddle.autograd.functional import _separate + + + def func(x, y): + return x * y + + + x = paddle.ones((1,)) + x.stop_gradient = False + + y = func(x, x) + print(paddle.grad(y, x)) + # [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [2.])] + + x1, x2 = _separate((x, x)) + y = func(x1, x2) + print(paddle.grad(y, x1)) + # [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [1.])] + + """ + if isinstance(xs, typing.Sequence): + return tuple(_single_separate(x) for x in xs) + else: + return _single_separate(xs) + + +def _single_separate(x): + if x is None: # x maybe none because grad input's v defaults to none. + return x + if not x.stop_gradient: + return paddle.clone(x) + else: # use detach to share memory when no need gradients. + x = x.detach() + x.stop_gradient = False + return x + return x + + +def _check_inputs(func, xs, v=None): + if not callable(func): + raise TypeError(f"Expected 'fun' is Callable, but got {type(func)}.") + + if not isinstance(xs, (framework.Variable, typing.Sequence)): + raise TypeError(f"Expected 'xs' is a Tensor|Sequence[Tensor]," + f"but got {type(xs)}.") + if isinstance(xs, typing.Sequence) and not all( + isinstance(x, framework.Variable) for x in xs): + raise TypeError("All elements of 'xs' shoule be Tensor.") + + if not isinstance(v, (framework.Variable, typing.Sequence, type(None))): + raise TypeError( + f"Expected 'v' is Tensor|Sequence[Tensor]|None, but got {type(v)}.") + + if isinstance(v, typing.Sequence) and not all( + isinstance(e, framework.Variable) for e in v): + raise TypeError("All elements of 'xs' shoule be Tensor.") + + +def _check_v_shape(v, refs): + if v is None: + return + + v, refs = _as_tensors(v), _as_tensors(refs) + if len(refs) != len(v): + raise RuntimeError(f"The argument v is a tuple of invalid length:" + f"should be {len(refs)} but got {len(v)}.") + + for index, (element_v, element_ref) in enumerate(zip(v, refs)): + if element_v.shape != element_ref.shape: + raise RuntimeError( + f"The v[{index}] has invalid shape: should " + f"be {element_ref.shape} but got {element_v.shape}.") @framework.dygraph_only @@ -354,16 +804,18 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): # [0., 0., 0., 2.]]), None)) ''' - inputs = _tensors(inputs, "inputs") - outputs = _tensors(func(*inputs), "outputs") + inputs = _as_tensors(inputs) + outputs = _as_tensors(func(*inputs)) fin_size = len(inputs) fout_size = len(outputs) - flat_outputs = tuple(reshape(output, shape=[-1]) for output in outputs) + flat_outputs = tuple( + paddle.reshape( + output, shape=[-1]) for output in outputs) jacobian = tuple() for i, flat_output in enumerate(flat_outputs): jac_i = list([] for _ in range(fin_size)) for k in range(len(flat_output)): - row_k = grad( + row_k = paddle.grad( flat_output[k], inputs, create_graph=create_graph, @@ -371,7 +823,7 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): allow_unused=allow_unused) for j in range(fin_size): jac_i[j].append( - reshape( + paddle.reshape( row_k[j], shape=[-1]) if isinstance(row_k[j], paddle.Tensor) else None) jacobian += (tuple( @@ -419,7 +871,7 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): be a tuple of Tensors. If both of inputs and outputs are Tensor list/tuple, then the Jacobian will be a tuple of tuple of Tensors. Noted that the first dimension of inputs is batch size. - + For example, the inputs shape and outputs shape of function ``func` is [batch_size, num] and [batch_size, num] respectively, then the Jacobian will be a Tensor with @@ -489,10 +941,10 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): # [0., 1., 0., 1., 0., 1., 0., 1.]]), Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True, # [[1., 0., 1., 0., 1., 0., 1., 0.], # [0., 1., 0., 1., 0., 1., 0., 1.]])) - + ''' - inputs = _tensors(inputs, "inputs") - outputs = _tensors(func(*inputs), "outputs") + inputs = _as_tensors(inputs) + outputs = _as_tensors(func(*inputs)) batch_size = inputs[0].shape[0] for input in inputs: assert input.shape[ @@ -503,13 +955,13 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): fin_size = len(inputs) fout_size = len(outputs) flat_outputs = tuple( - reshape( + paddle.reshape( output, shape=[batch_size, -1]) for output in outputs) jacobian = tuple() for i, flat_output in enumerate(flat_outputs): jac_i = list([] for _ in range(fin_size)) for k in range(flat_output.shape[1]): - row_k = grad( + row_k = paddle.grad( flat_output[:, k], inputs, create_graph=create_graph, @@ -517,7 +969,7 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): allow_unused=allow_unused) for j in range(fin_size): jac_i[j].append( - reshape( + paddle.reshape( row_k[j], shape=[-1]) if isinstance(row_k[j], paddle.Tensor) else None) jacobian += (tuple( @@ -569,7 +1021,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): the inputs shape and outputs shape of function ``func` is [batch_size, num] and [batch_size, 1] respectively, then the batched Hessian will be a Tensor with a shape of [num, batch_size * num]. - + Why the final shape in this case is that? because batch_hessian will create a inner func(the wrapper of paddle.grad() func) to computes the sum of gradients of `outputs` with respect to each `inputs`, @@ -579,7 +1031,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): matrix of the ``i``th column output(Noted that this output means the first order differentiation) and the ``j``th input and will have same dtype and device as the corresponding input. Other situations can be deduced by analogy. - + Examples 1: .. code-block:: python @@ -592,8 +1044,8 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): def func(x): return paddle.matmul(x * x, weight)[:, 0:1] - - + + x.stop_gradient = False batch_hessian = paddle.autograd.batch_hessian(func, x) print(batch_hessian) @@ -612,7 +1064,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): def func(x, y): return paddle.matmul(x * x * y * y, weight)[:, 0:1] - + x.stop_gradient = False y.stop_gradient = False batch_hessian = paddle.autograd.batch_hessian(func, [x, y]) @@ -629,7 +1081,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): # Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True, # [[2., 0., 2., 0., 2., 0., 2., 0.], # [0., 2., 0., 2., 0., 2., 0., 2.]]))) - + Examples 3: .. code-block:: python @@ -639,7 +1091,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): x = paddle.ones(shape=(4, 2), dtype='float64') weight = paddle.ones(shape=(2, 4), dtype='float64') y = paddle.ones(shape=(4, 2), dtype='float64') - + def func(x, y): return paddle.matmul(x * x, weight)[:, 0:1] @@ -652,7 +1104,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): # [0., 2., 0., 2., 0., 2., 0., 2.]]), None), (None, None)) ''' - inputs = _tensors(inputs, "inputs") + inputs = _as_tensors(inputs) outputs = func(*inputs) batch_size = inputs[0].shape[0] for input in inputs: @@ -663,7 +1115,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): ], "The function to compute batched Hessian matrix should return a Tensor of shape [batch_size, 1]" def jac_func(*ins): - grad_inputs = grad( + grad_inputs = paddle.grad( outputs, ins, create_graph=True, @@ -715,7 +1167,7 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): def func(x): return paddle.sum(paddle.matmul(x, x)) - + x = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False hessian = paddle.autograd.hessian(func, x) @@ -733,7 +1185,7 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): def func(x, y): return paddle.sum(paddle.matmul(x, y)) - + x = paddle.ones(shape=[2, 2], dtype='float32') y = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False @@ -768,7 +1220,7 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): def func(x, y): return paddle.sum(paddle.matmul(x, x)) - + x = paddle.ones(shape=[2, 2], dtype='float32') y = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False @@ -782,14 +1234,14 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): # [0., 1., 1., 2.]]), None), (None, None)) ''' - inputs = _tensors(inputs, "inputs") + inputs = _as_tensors(inputs) outputs = func(*inputs) assert isinstance(outputs, paddle.Tensor) and outputs.shape == [ 1 ], "The function to compute Hessian matrix should return a Tensor with a single element" def jac_func(*ins): - grad_inputs = grad( + grad_inputs = paddle.grad( outputs, ins, create_graph=True, @@ -803,7 +1255,6 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused) -@framework.dygraph_only def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): ''' .. note:: @@ -839,7 +1290,7 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): import paddle def func(x): return paddle.sum(paddle.matmul(x, x)) - + x = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False vx = paddle.ones(shape=[2, 2], dtype='float32') * 2 @@ -856,7 +1307,7 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): import paddle def func(x): return paddle.sum(paddle.matmul(x, x)) - + x = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False vhp_rslt = paddle.autograd.vhp(func, x) @@ -872,7 +1323,7 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): import paddle def func(x, y): return paddle.sum(paddle.matmul(x, x)) - + x = paddle.ones(shape=[2, 2], dtype='float32') x.stop_gradient = False y = paddle.ones(shape=[2, 2], dtype='float32') @@ -887,177 +1338,17 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): # [[8., 8.], # [8., 8.]]), None]) ''' - xs = _tensors(inputs, "inputs") + xs = _as_tensors(inputs) if v is not None: - v = _tensors(v, "v") - - with gradient_scope( - xs, v, create_graph=create_graph, - allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: - outputs = func(*xs) - ys = _tensors(outputs, "outputs") - assert len(ys) == 1 and isinstance( - ys[0], paddle.Tensor - ) and ys[0].shape == [ - 1 - ], "The function to compute vhp should return a Tensor with a single element" - jac = grad_fn(ys, xs, create_graph=True) - vhp = grad_fn(jac, xs, v) - outputs, vhp = return_fn(outputs), return_fn(vhp) + v = _as_tensors(v) + xs, v = _separate(xs), _separate(v) + outputs = func(*xs) + ys = _as_tensors(outputs) + assert len(ys) == 1 and isinstance( + ys[0], framework.Variable + ) and ys[0].shape == [ + 1 + ], "The function to compute vhp should return a Tensor with a single element" + jac = _grad(ys, xs) + vhp = _grad(jac, xs, v) return outputs, vhp - - -class Jacobian(object): - r""" - Computes the Jacobian matrix of function `func`, which may take as input - single or multiple tensor typed arguments and output a single tensor or - multiple tensors. - - In case `func` is multi-input and multi-output, i.e., - - func: Callable[[Tensor, ...], [Tensor, ...]] - - `func` is treated as a vector valued function with all its inputs flattened - into a single one dimensional tensor, or a two dimensional tensor with the - first dimension retained as the batching dimension. The same rule applies to - the function outputs. - - Once the Jacobian J is constructed, there are four ways to retrieve the - partial derivatives. - - - J[:], retrieving the full matrix. - - - J[:, j], retrieving the partial derivatives w.r.t. the j'th input - variable. - - - J[i, :], retrieving the partial derivatives w.r.t. the i'th output - variable. - - - J[i, j], retrieving the partial derivatives w.r.t. the i'th output - variable and the j'th input variable. - - Examples: - .. code-block:: python - import paddle - import numpy as np - - def func(xs): - x, y = xs - return paddle.matmul(x, y) - - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - x = paddle.static.data(name='x', shape=[2, 2], dtype='float32') - JJ = paddle.autograd.functional.Jacobian(func, [x, x]) - nrow, ncol = JJ.shape() - full_jacobian = JJ[:] - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(startup) - - feeds = {'x': np.array([[2., 2.], [2., 1.]]).astype('float32')} - jacobian = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0] - print(jacobian) - # [[4. 2. 2. 0. 4. 2. 2. 0.] - # [2. 3. 0. 2. 2. 3. 0. 2.] - # [2. 0. 3. 2. 2. 0. 3. 2.] - # [0. 2. 2. 2. 0. 2. 2. 2.]] - """ - - def __init__(self, func, inputs, batch=False): - r"""Constructing a Jacobian matrix. - - Parameters: - func (Callable): a Python function that takes as input a Tensor - or a Tensor list and outputs a Tensor or a Tensor list. - inputs (Tensor|list[Tensor]): a Tensor or a list of Tensors as - `func`'s input. - batch (bool): if True the 0'th axis is considered the batch - dimension, both on input and output. - """ - - def enable_grads(inputs): - if isinstance(inputs, (list, tuple)): - for x in inputs: - x.stop_gradient = False - else: - assert isinstance(inputs, paddle.fluid.framework.Variable), ( - f"Expecting {inputs} to be paddle.fluid.framework.Variable," - f" however it's found to be a(n) {type(inputs)}.") - inputs.stop_gradient = False - return inputs - - self.batch = batch - self.xs = enable_grads(inputs) - ys = func(inputs) - if not isinstance(ys, list): - ys = [ys] - self.y = self.flatten_all(ys) - self.ydim = self.y.shape[-1] - self.xdim = self.flatten_all(inputs).shape[-1] - self.bdim = self.y.shape[0] - self.jacobian = {} - - def flatten(self, x): - to = [x.shape[0], -1] if self.batch else [-1] - return x.reshape(to) - - def flatten_all(self, xs): - if isinstance(xs, (list, tuple)): - return paddle.concat([self.flatten(x) for x in xs], axis=-1) - else: - return self.flatten(xs) - - def shape(self): - return (self.ydim, self.xdim) - - def __getitem__(self, tup): - if hasattr(tup, '__iter__'): - i, j = tup - else: - i, j = tup, None - - full = isinstance(i, slice) - - if full: - if 'full' not in self.jacobian: - rows = [ - self.flatten_all(gradients(self.y[..., i], self.xs)) - for i in range(self.ydim) - ] - self.jacobian['full'] = full_jacobian = paddle.stack(rows) - else: - full_jacobian = self.jacobian['full'] - - return full_jacobian[i] if j is None else full_jacobian[i][..., j] - - assert 0 <= i < self.ydim, f"Jacobian index i={i} is not valid." - assert j is None or isinstance(j, slice) or (0 <= j < self.xdim), ( - f"Jacobian index j={j} is not valid.") - if 'full' in self.jacobian: - JJ = self.jacobian['full'] - else: - JJ = self.jacobian - if i not in self.jacobian: - self.jacobian[i] = self.flatten_all( - gradients(self.y[..., i], self.xs)) - - if j is None: - return JJ[i] - else: - return JJ[i][..., j] - - -class Hessian(object): - def __init__(self, func, inputs, batch=False): - f_x = lambda xs: Jacobian(func, xs, batch=batch)[0] - self.symbolic = Jacobian(f_x, inputs, batch=batch) - self.xs = inputs - self.batch = batch - - def __getitem__(self, tup): - return self.symbolic[tup] - - def shape(self): - return self.symbolic.shape() diff --git a/python/paddle/autograd/utils.py b/python/paddle/autograd/utils.py deleted file mode 100644 index 710c9ee18d..0000000000 --- a/python/paddle/autograd/utils.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle - - -def _tensors(ts, name): - if isinstance(ts, (list, tuple)): - assert len(ts) > 0, "{} connot be empty".format(name) - for each_t in ts: - assert isinstance( - each_t, paddle.Tensor - ) or each_t is None, "Elements of {} must be paddle.Tensor or None".format( - name) - return list(ts) - else: - assert isinstance(ts, paddle.Tensor), "{} must be Tensor".format(name) - return [ts] - - -def _stack_tensor_or_return_none(origin_list): - assert len(origin_list) > 0, "Can't not stack an empty list" - return paddle.stack( - origin_list, axis=0) if isinstance(origin_list[0], - paddle.Tensor) else None - - -def _replace_none_with_zero_tensor(t, spec_t): - if t is None: - zero_t = paddle.zeros(shape=spec_t.shape, dtype=spec_t.dtype) - zero_t.stop_gradient = spec_t.stop_gradient - return zero_t - else: - return t diff --git a/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt b/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt index 6d9625483e..1f69abac01 100644 --- a/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt @@ -6,6 +6,5 @@ foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) endforeach(TEST_OP) -set_tests_properties(test_jacobian PROPERTIES TIMEOUT 50) -set_tests_properties(test_hessian PROPERTIES TIMEOUT 50) -set_tests_properties(test_vhp PROPERTIES TIMEOUT 50) +set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 100) +set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 100) diff --git a/python/paddle/fluid/tests/unittests/autograd/config.py b/python/paddle/fluid/tests/unittests/autograd/config.py new file mode 100644 index 0000000000..311ca49d39 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/autograd/config.py @@ -0,0 +1,49 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle + +DEVICES = [paddle.CPUPlace()] +if paddle.is_compiled_with_cuda(): + DEVICES.append(paddle.CUDAPlace(0)) + +DEFAULT_DTYPE = 'float64' + +# The numerical tolerance of different dtype of different order different +# derivative. It's a empirical value provided by Paddle Science team. +TOLERANCE = { + "float32": { + "first_order_grad": { + "rtol": 1e-3, + "atol": 1e-3, + "eps": 1e-4 + }, + "second_order_grad": { + "rtol": 1e-2, + "atol": 1e-2, + "eps": 1e-2 + } + }, + "float64": { + "first_order_grad": { + "rtol": 1e-7, + "atol": 1e-7, + "eps": 1e-7 + }, + "second_order_grad": { + "rtol": 1e-5, + "atol": 1e-5, + "eps": 1e-5 + } + } +} diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py new file mode 100644 index 0000000000..e46c532eb0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_dynamic.py @@ -0,0 +1,1233 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import typing +import unittest + +import numpy as np +import paddle +import paddle.compat as cpt +import paddle.nn.functional as F +from paddle.autograd.functional import _as_tensors + +import config +import utils +from utils import (_compute_numerical_batch_hessian, _compute_numerical_hessian, + _compute_numerical_vhp, _compute_numerical_jacobian, + _compute_numerical_batch_jacobian) +from utils import matmul, mul, nested, o2, pow, reduce, reduce_dim, unuse + + +def make_v(f, inputs): + outputs = _as_tensors(f(*inputs)) + return [paddle.ones_like(x) for x in outputs] + + +class TestAutogradFunctional(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.RAW_INPUTS = { + 'a': [1.0], + 'b': [1.0, 2.0], + 'c': [3.0, 4.0], + 'd': [[2.0], [3.0]], + 'A': [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], + 'B': [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], + } + + def setUp(self): + pass + + def gen_input(self, inp, stop_gradient=False): + if isinstance(inp, paddle.Tensor): + return inp + return paddle.to_tensor( + self.RAW_INPUTS[inp], stop_gradient=stop_gradient) + + def gen_inputs(self, inputs): + if isinstance(inputs, list): + inputs = [self.gen_input(x) for x in inputs] + else: + inputs = [self.gen_input(inputs)] + return inputs + + def gen_test_pairs(self, + func, + inputs, + v=None, + create_graph=False, + allow_unused=False): + def vjp_test(): + nonlocal v + xs = self.gen_inputs(inputs) + if v is not None: + v = self.gen_inputs(v) + outputs, inputs_grad = paddle.autograd.vjp(func, xs, v) + else: + outputs, inputs_grad = paddle.autograd.vjp(func, xs) + return outputs, inputs_grad + + def grad_test(): + nonlocal v + xs = self.gen_inputs(inputs) + if v is not None: + v = self.gen_inputs(v) + outputs = func(*xs) + if v is not None: + inputs_grad = paddle.grad( + outputs, + xs, + v, + create_graph=create_graph, + allow_unused=allow_unused) + else: + inputs_grad = paddle.grad( + outputs, + xs, + create_graph=create_graph, + allow_unused=allow_unused) + return outputs, inputs_grad + + return vjp_test, grad_test + + def gen_jvp_tests(self, + func, + inputs, + v=None, + create_graph=False, + allow_unused=False): + def jvp_test(): + nonlocal v + xs = self.gen_inputs(inputs) + if v is not None: + v = self.gen_inputs(v) + outputs, outputs_grad = paddle.autograd.jvp( + func, + xs, + v, + create_graph=create_graph, + allow_unused=allow_unused) + else: + outputs, outputs_grad = paddle.autograd.jvp( + func, + xs, + create_graph=create_graph, + allow_unused=allow_unused) + return outputs, outputs_grad + + return jvp_test + + def check_results(self, ref, res): + type_error = 'Result is different than expected in shape or type' + value_error = 'Result is different than expected values' + if ref is None: + self.assertTrue(res is None, type_error) + elif isinstance(ref, paddle.Tensor): + self.assertTrue(isinstance(res, paddle.Tensor), type_error) + np.testing.assert_allclose(res, ref) + else: + self.assertTrue(len(res) == len(ref), type_error) + for i in range(len(ref)): + self.check_results(ref[i], res[i]) + return True + + +class TestVJP(TestAutogradFunctional): + def test_vjp_i1o1(self): + test_cases = [ + [reduce, 'A'], # noqa + [reduce_dim, 'A'], # noqa + ] # noqa + for f, inputs in test_cases: + vjp, grad = self.gen_test_pairs(f, inputs) + vjp_result, grad_result = vjp(), grad() + self.check_results(grad_result, vjp_result) + + def test_vjp_i2o1(self): + test_cases = [ + [matmul, ['A', 'B']], # noqa + [mul, ['b', 'c']], # noqa + ] # noqa + for f, inputs in test_cases: + vjp, grad = self.gen_test_pairs(f, inputs) + vjp_result, grad_result = vjp(), grad() + self.check_results(grad_result, vjp_result) + + def test_vjp_i2o2(self): + test_cases = [ + [o2, ['A', 'A']], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + v = make_v(f, inputs) + vjp, grad = self.gen_test_pairs(f, inputs, v=v) + vjp_result, grad_result = vjp(), grad() + self.check_results(grad_result, vjp_result) + + def test_vjp_i2o2_omitting_v(self): + test_cases = [ + [o2, ['A', 'A']], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + vjp, grad = self.gen_test_pairs(f, inputs) + vjp_result, grad_result = vjp(), grad() + self.check_results(grad_result, vjp_result) + + def test_vjp_nested(self): + x = self.gen_input('a') + test_cases = [ + [nested(x), 'a'], # noqa + ] + for f, inputs in test_cases: + vjp, grad = self.gen_test_pairs(f, inputs) + vjp_result, grad_result = vjp(), grad() + self.check_results(grad_result, vjp_result) + + def test_vjp_aliased_input(self): + x = self.gen_input('a') + ref = self.gen_test_pairs(nested(x), 'a')[0] + aliased = self.gen_test_pairs(nested(x), x)[0] + ref_result, aliased_result = ref(), aliased() + self.check_results(ref_result, aliased_result) + + +@utils.place(config.DEVICES) +@utils.parameterize( + (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), ( + ('v_shape_not_equal_ys', utils.square, np.random.rand(3), + np.random.rand(1), RuntimeError), )) +class TestVJPException(unittest.TestCase): + def test_vjp(self): + with self.assertRaises(self.expected_exception): + paddle.autograd.vjp(self.fun, + paddle.to_tensor(self.xs), + paddle.to_tensor(self.v)) + + +def jac(grad_fn, f, inputs): + assert grad_fn in [paddle.autograd.vjp, paddle.autograd.jvp] + if grad_fn is paddle.autograd.jvp: + vs = [paddle.zeros_like(x) for x in inputs] + else: + outputs = f(*inputs) + if isinstance(outputs, paddle.Tensor): + outputs = [outputs] + vs = [paddle.zeros_like(y) for y in outputs] + JJ_cols = [] + for i, v in enumerate(vs): + v = v.flatten() + for j in range(len(v)): + _v = paddle.zeros_like(v).detach() + _v[j] = 1.0 + _v = _v.reshape(vs[i].shape) + _vs = vs.copy() + _vs[i] = _v + _, grads = grad_fn(f, inputs, _vs) + d_outs = paddle.concat([d_out.flatten() for d_out in grads]) + JJ_cols.append(d_outs) + # JJ is the fully unrolled jacobian + JJ = paddle.stack(JJ_cols) + if grad_fn is paddle.autograd.vjp: + JJ = JJ.t() + return JJ + + +class TestJVP(TestAutogradFunctional): + def test_jvp_i1o1(self): + test_cases = [ + [reduce, 'A'], # noqa + [reduce_dim, 'A'], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + forward_jac = jac(paddle.autograd.jvp, f, inputs) + reverse_jac = jac(paddle.autograd.vjp, f, inputs) + self.check_results(forward_jac, reverse_jac) + + def test_jvp_i2o1(self): + test_cases = [ # noqa + [matmul, ['A', 'B']], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + forward_jac = jac(paddle.autograd.jvp, f, inputs) + reverse_jac = jac(paddle.autograd.vjp, f, inputs) + self.check_results(forward_jac, reverse_jac) + + def test_jvp_i2o2(self): + test_cases = [ # noqa + [o2, ['A', 'A']], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + forward_jac = jac(paddle.autograd.jvp, f, inputs) + reverse_jac = jac(paddle.autograd.vjp, f, inputs) + self.check_results(forward_jac, reverse_jac) + + def test_jvp_i2o2_omitting_v(self): + test_cases = [ # noqa + [o2, ['A', 'A']], # noqa + ] # noqa + for f, inputs in test_cases: + inputs = self.gen_inputs(inputs) + results_omitting_v = paddle.autograd.jvp(f, inputs) + v = [paddle.ones_like(x) for x in inputs] + results_with_v = paddle.autograd.jvp(f, inputs, v) + self.check_results(results_omitting_v, results_with_v) + + +@utils.place(config.DEVICES) +@utils.parameterize((utils.TEST_CASE_NAME, 'func', 'xs'), ( + ('1d_in_1d_out', utils.square, np.array([2., 3.])), + ('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)), + ('single_in_single_out', utils.square, np.random.rand(2, 3)), + ('multi_in_single_out', paddle.matmul, + (np.random.rand(2, 2), np.random.rand(2, 2))), )) +class TestJacobianClassNoBatch(unittest.TestCase): + def setUp(self): + self._dtype = self.xs[0].dtype if isinstance( + self.xs, typing.Sequence) else self.xs.dtype + self._eps = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("eps") + self._rtol = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("atol") + + self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( + self.xs, typing.Sequence) else paddle.to_tensor(self.xs) + self._actual = paddle.autograd.Jacobian(self.func, self.xs, False) + self._expected = self._expected() + + def test_jacobian(self): + Index = collections.namedtuple('Index', ('type', 'value')) + indexes = (Index('all', (slice(0, None, None), slice(0, None, None))), + Index('row', (0, slice(0, None, None))), + Index('col', (slice(0, None, None), 0)), + Index('multi-row', (slice(0, 2, 1), slice(0, None, None)))) + self.assertEqual(self._actual[:].numpy().dtype, self._expected.dtype) + for index in indexes: + np.testing.assert_allclose( + self._actual.__getitem__(index.value), + self._expected.__getitem__(index.value), + rtol=self._rtol, + atol=self._atol, + err_msg=f'Testcase {index.type} index not passed, value is {index.value}' + ) + + def _expected(self): + jac = utils._compute_numerical_jacobian(self.func, self.xs, self._eps, + self._dtype) + return utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NM) + + +@utils.place(config.DEVICES) +@utils.parameterize((utils.TEST_CASE_NAME, 'func', 'xs'), ( + ('1d_in_1d_out', utils.square, np.array([[1., 2., 3.], [3., 4., 3.]])), + ('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)), + ('multi_in_single_out', utils.square, np.random.rand(2, 3)), )) +class TestJacobianClassBatchFirst(unittest.TestCase): + def setUp(self): + self._dtype = self.xs[0].dtype if isinstance( + self.xs, typing.Sequence) else self.xs.dtype + self._eps = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("eps") + self._rtol = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str(self._dtype)).get( + "first_order_grad").get("atol") + + self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance( + self.xs, typing.Sequence) else paddle.to_tensor(self.xs) + self._actual = paddle.autograd.Jacobian(self.func, self.xs, True) + self._expected = self._expected() + + def test_jacobian(self): + Index = collections.namedtuple('Index', ('type', 'value')) + indexes = ( + Index('all', (slice(0, None, None), slice(0, None, None), + slice(0, None, None))), + Index('row', (slice(0, None, None), 0, slice(0, None, None))), + Index('col', + (slice(0, None, None), slice(0, None, None), 0)), Index( + 'batch', (slice(0, 2, None), slice(0, None, None), + slice(0, None, None))), + Index('multi_row', + (slice(0, 1, None), slice(0, 2, 1), slice(0, None, None)))) + self.assertEqual(self._actual[:].numpy().dtype, self._expected.dtype) + for index in indexes: + np.testing.assert_allclose( + self._actual.__getitem__(index.value), + self._expected.__getitem__(index.value), + rtol=self._rtol, + atol=self._atol, + err_msg=f'Testcase {index.type} index not passed, value is {index.value}' + ) + + def _expected(self): + jac = utils._compute_numerical_batch_jacobian( + self.func, self.xs, self._eps, self._dtype, False) + jac = utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NBM) + return utils._np_transpose_matrix_format(jac, utils.MatrixFormat.NBM, + utils.MatrixFormat.BNM) + + +class TestHessianClassNoBatch(unittest.TestCase): + @classmethod + def setUpClass(self): + self.shape = (2, 2) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + + def test_single_input(self): + def func(x): + return paddle.sum(paddle.matmul(x, x)) + + numerical_hessian = utils._compute_numerical_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian) + + self.x.stop_gradient = False + hessian = paddle.autograd.Hessian(func, self.x) + np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian, + self.rtol, self.atol) + + def test_multi_input(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, y)) + + numerical_hessian = utils._compute_numerical_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian) + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.Hessian(func, [self.x, self.y]) + np.testing.assert_allclose( + hessian[:].numpy(), + numerical_hessian, + rtol=self.rtol, + atol=self.atol) + + def test_allow_unused_true(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, x)) + + numerical_hessian = utils._compute_numerical_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian) + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.Hessian(func, [self.x, self.y]) + np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian, + self.rtol, self.atol) + + def test_create_graph_true(self): + def func(x): + return paddle.sum(F.sigmoid(x)) + + numerical_hessian = utils._compute_numerical_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian) + self.x.stop_gradient = False + hessian = paddle.autograd.Hessian(func, self.x) + assert hessian[:].stop_gradient == False + np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian, + self.rtol, self.atol) + + def test_out_not_single(self): + def func(x): + return x * x + + with self.assertRaises(RuntimeError): + paddle.autograd.Hessian(func, paddle.ones([3])) + + +class TestHessianClassBatchFirst(unittest.TestCase): + @classmethod + def setUpClass(self): + self.x_shape = (5, 2) + self.weight_shape = (2, 4) + self.y_shape = (5, 2) + self.nbatch, self.nrow = 5, 2 + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('atol') + self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) + self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) + + def test_single_input(self): + def func(x): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + expected = utils._compute_numerical_batch_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + + H = paddle.autograd.Hessian(func, self.x, is_batched=True) + actual = utils._np_transpose_matrix_format( + H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + actual = actual.reshape((H.shape[1], -1)) + + np.testing.assert_allclose(actual, expected, self.rtol, self.atol) + + def test_multi_input(self): + def func(x, y): + return paddle.matmul(x * x * y * y, self.weight)[:, 0:1] + + xs_len = 2 + expected = utils._compute_numerical_batch_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + expected = np.reshape( + np.array(expected), + (xs_len, xs_len, self.nrow, self.nbatch, self.nrow)) + expected = [[n for n in row] for row in expected] + expected = utils._np_concat_matrix_sequence(expected) + + self.x.stop_gradient = False + self.y.stop_gradient = False + H = paddle.autograd.Hessian(func, [self.x, self.y], is_batched=True) + actual = utils._np_transpose_matrix_format( + H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + + np.testing.assert_allclose(actual, expected, self.rtol, self.atol) + + def test_allow_unused(self): + def func(x, y): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + xs_len = 2 + expected = utils._compute_numerical_batch_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + expected = np.reshape( + np.array(expected), + (xs_len, xs_len, self.nrow, self.nbatch, self.nrow)) + expected = [[n for n in row] for row in expected] + expected = utils._np_concat_matrix_sequence(expected) + expected = utils._np_transpose_matrix_format( + expected, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM) + + actual = paddle.autograd.Hessian( + func, [self.x, self.y], is_batched=True)[:] + + np.testing.assert_allclose( + actual, expected, rtol=self.rtol, atol=self.atol) + + def test_stop_gradient(self): + def func(x): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + expected = utils._compute_numerical_batch_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + + x = self.x.clone() + x.stop_gradient = True + H = paddle.autograd.Hessian(func, self.x, is_batched=True)[:] + actual = utils._np_transpose_matrix_format( + H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM) + actual = actual.reshape((H.shape[1], -1)) + + np.testing.assert_allclose(actual, expected, self.rtol, self.atol) + + def test_out_not_single(self): + def func(x): + return (x * x) + + with self.assertRaises(RuntimeError): + paddle.autograd.Hessian(func, paddle.ones((3, 3)), is_batched=True) + + +class TestHessian(unittest.TestCase): + @classmethod + def setUpClass(self): + self.shape = (2, 2) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + + def test_single_input(self): + def func(x): + return paddle.sum(paddle.matmul(x, x)) + + numerical_hessian = _compute_numerical_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + hessian = paddle.autograd.hessian(func, self.x) + np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0], + self.rtol, self.atol) + + def test_multi_input(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, y)) + + numerical_hessian = _compute_numerical_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.hessian(func, [self.x, self.y]) + for i in range(len(hessian)): + for j in range(len(hessian[0])): + np.testing.assert_allclose(hessian[i][j].numpy(), + numerical_hessian[i][j], self.rtol, + self.atol) + + def test_allow_unused_false(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, x)) + + try: + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.hessian(func, [self.x, self.y]) + except ValueError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("allow_unused") > 0 + + def test_allow_unused_true(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, x)) + + numerical_hessian = _compute_numerical_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.hessian( + func, [self.x, self.y], allow_unused=True) + for i in range(len(hessian)): + for j in range(len(hessian[0])): + if i == j == 0: + np.testing.assert_allclose(hessian[i][j].numpy(), + numerical_hessian[i][j], + self.rtol, self.atol) + else: + assert hessian[i][j] is None + + def test_create_graph_false(self): + def func(x): + return paddle.sum(paddle.matmul(x, x)) + + numerical_hessian = _compute_numerical_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + hessian = paddle.autograd.hessian(func, self.x) + assert hessian.stop_gradient == True + np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0], + self.rtol, self.atol) + try: + paddle.grad(hessian, self.x) + except RuntimeError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("has no gradient") > 0 + + def test_create_graph_true(self): + def func(x): + return paddle.sum(F.sigmoid(x)) + + numerical_hessian = _compute_numerical_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + hessian = paddle.autograd.hessian(func, self.x, create_graph=True) + assert hessian.stop_gradient == False + np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0], + self.rtol, self.atol) + triple_grad = paddle.grad(hessian, self.x) + assert triple_grad is not None + + +class TestHessianFloat64(TestHessian): + @classmethod + def setUpClass(self): + self.shape = (2, 2) + self.dtype = 'float64' + self.np_dtype = np.float64 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + + +class TestBatchHessian(unittest.TestCase): + @classmethod + def setUpClass(self): + self.x_shape = (5, 2) + self.weight_shape = (2, 4) + self.y_shape = (5, 2) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) + self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) + + def test_single_input(self): + def func(x): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + numerical_hessian = _compute_numerical_batch_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True) + np.testing.assert_allclose(hessian, numerical_hessian, self.rtol, + self.atol) + + def test_multi_input(self): + def func(x, y): + return paddle.matmul(x * x * y * y, self.weight)[:, 0:1] + + numerical_hessian = _compute_numerical_batch_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.batch_hessian(func, [self.x, self.y]) + + shape_tensor = paddle.to_tensor(numerical_hessian).astype("float64") + hessian_reshape = np.reshape(hessian, (shape_tensor.shape)) + np.testing.assert_allclose(hessian_reshape, numerical_hessian, + self.rtol, self.atol) + + def test_allow_unused_false(self): + def func(x, y): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + try: + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.batch_hessian(func, [self.x, self.y]) + except ValueError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("allow_unused") > 0 + + def test_allow_unused_true(self): + def func(x, y): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + numerical_hessian = _compute_numerical_batch_hessian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + hessian = paddle.autograd.batch_hessian( + func, [self.x, self.y], allow_unused=True) + + for i in range(len(hessian)): + for j in range(len(hessian[0])): + if i == j == 0: + numerical_hessian = np.stack( + (numerical_hessian[i][j], numerical_hessian[i][j + 1]), + axis=0) + np.testing.assert_allclose(hessian[i][j], numerical_hessian, + self.rtol, self.atol) + else: + assert hessian[i][j] is None + + def test_create_graph_false(self): + def func(x): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + numerical_hessian = _compute_numerical_batch_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + hessian = paddle.autograd.batch_hessian(func, self.x) + assert hessian.stop_gradient == True + np.testing.assert_allclose(hessian.numpy(), numerical_hessian, + self.rtol, self.atol) + try: + paddle.grad(hessian, self.x) + except RuntimeError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("has no gradient") > 0 + + def test_create_graph_true(self): + def func(x): + return paddle.matmul(x * x, self.weight)[:, 0:1] + + numerical_hessian = _compute_numerical_batch_hessian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True) + assert hessian.stop_gradient == False + np.testing.assert_allclose(hessian.numpy(), numerical_hessian, + self.rtol, self.atol) + triple_grad = paddle.grad(hessian, self.x) + assert triple_grad is not None + + +class TestBatchHessianFloat64(TestBatchHessian): + @classmethod + def setUpClass(self): + self.x_shape = (5, 2) + self.weight_shape = (2, 4) + self.y_shape = (5, 2) + self.dtype = 'float64' + self.np_dtype = np.float64 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) + self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) + + +class TestVHP(unittest.TestCase): + @classmethod + def setUpClass(self): + self.shape = (2, 2) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("eps") + self.rtol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("rtol") + self.atol = config.TOLERANCE.get(self.dtype).get( + "second_order_grad").get("atol") + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + self.vx = paddle.rand(shape=self.shape, dtype=self.dtype) + self.vy = paddle.rand(shape=self.shape, dtype=self.dtype) + + def test_single_input(self): + def func(x): + return paddle.sum(paddle.matmul(x, x)) + + numerical_func_output = func(self.x).numpy() + numerical_vhp = _compute_numerical_vhp( + func, self.x, self.vx, self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) + np.testing.assert_allclose(func_output.numpy(), numerical_func_output, + self.rtol, self.atol) + np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, + self.atol) + + def test_multi_input(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, y)) + + numerical_func_output = func(self.x, self.y).numpy() + numerical_vhp = _compute_numerical_vhp( + func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, + self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y], + [self.vx, self.vy]) + np.testing.assert_allclose(func_output.numpy(), numerical_func_output, + self.rtol, self.atol) + for i in range(len(vhp)): + np.testing.assert_allclose(vhp[i].numpy(), numerical_vhp[i], + self.rtol, self.atol) + + def test_v_default(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, y)) + + numerical_func_output = func(self.x, self.y).numpy() + vx = paddle.ones(self.vx.shape, dtype=self.vx.dtype) + vy = paddle.ones(self.vy.shape, dtype=self.vy.dtype) + numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], + [vx, vy], self.numerical_delta, + self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y]) + np.testing.assert_allclose(func_output.numpy(), numerical_func_output, + self.rtol, self.atol) + for i in range(len(vhp)): + np.testing.assert_allclose(vhp[i].numpy(), numerical_vhp[i], + self.rtol, self.atol) + + def test_allow_unused_true(self): + def func(x, y): + return paddle.sum(paddle.matmul(x, x)) + + numerical_func_output = func(self.x, self.y).numpy() + numerical_vhp = _compute_numerical_vhp( + func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, + self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y], + [self.vx, self.vy]) + np.testing.assert_allclose(func_output.numpy(), numerical_func_output, + self.rtol, self.atol) + np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, + self.atol) + + def test_create_graph_true(self): + def func(x): + return paddle.sum(F.sigmoid(x)) + + numerical_func_output = func(self.x).numpy() + numerical_vhp = _compute_numerical_vhp( + func, self.x, self.vx, self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) + np.testing.assert_allclose(func_output.numpy(), numerical_func_output, + self.rtol, self.atol) + assert vhp[0].stop_gradient == False + np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, + self.atol) + triple_grad = paddle.grad(vhp, self.x) + assert triple_grad is not None + + +class TestJacobian(unittest.TestCase): + @classmethod + def setUpClass(self): + self.shape = (4, 4) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = 1e-4 + self.rtol = 1e-3 + self.atol = 1e-3 + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + + def test_single_input_and_single_output(self): + def func(x): + return paddle.matmul(x, x) + + numerical_jacobian = _compute_numerical_jacobian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, self.x) + np.testing.assert_allclose(jacobian.numpy(), numerical_jacobian[0][0], + self.rtol, self.atol) + + def test_single_input_and_multi_output(self): + def func(x): + return paddle.matmul(x, x), x * x + + numerical_jacobian = _compute_numerical_jacobian( + func, self.x, self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, self.x) + for i in range(len(jacobian)): + np.testing.assert_allclose(jacobian[i].numpy(), + numerical_jacobian[i][0], self.rtol, + self.atol) + + def test_multi_input_and_single_output(self): + def func(x, y): + return paddle.matmul(x, y) + + numerical_jacobian = _compute_numerical_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) + for j in range(len(jacobian)): + np.testing.assert_allclose(jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + + def test_multi_input_and_multi_output(self): + def func(x, y): + return paddle.matmul(x, y), x * y + + numerical_jacobian = _compute_numerical_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) + for i in range(len(jacobian)): + for j in range(len(jacobian[0])): + np.testing.assert_allclose(jacobian[i][j].numpy(), + numerical_jacobian[i][j], self.rtol, + self.atol) + + def test_allow_unused_false(self): + def func(x, y): + return paddle.matmul(x, x) + + try: + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) + except ValueError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("allow_unused") > 0 + + def test_allow_unused_true(self): + def func(x, y): + return paddle.matmul(x, x) + + numerical_jacobian = _compute_numerical_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian( + func, [self.x, self.y], allow_unused=True) + np.testing.assert_allclose( + jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol) + assert jacobian[1] is None + + def test_create_graph_false(self): + def func(x, y): + return paddle.matmul(x, y) + + numerical_jacobian = _compute_numerical_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) + for j in range(len(jacobian)): + assert jacobian[j].stop_gradient == True + np.testing.assert_allclose(jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + try: + paddle.grad(jacobian[0], [self.x, self.y]) + except RuntimeError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("has no gradient") > 0 + + def test_create_graph_true(self): + def func(x, y): + return paddle.matmul(x, y) + + numerical_jacobian = _compute_numerical_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.jacobian( + func, [self.x, self.y], create_graph=True) + for j in range(len(jacobian)): + assert jacobian[j].stop_gradient == False + np.testing.assert_allclose(jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + double_grad = paddle.grad(jacobian[0], [self.x, self.y]) + assert double_grad is not None + + +class TestJacobianFloat64(TestJacobian): + @classmethod + def setUpClass(self): + self.shape = (4, 4) + self.dtype = 'float64' + self.np_dtype = np.float64 + self.numerical_delta = 1e-7 + self.rtol = 1e-7 + self.atol = 1e-7 + self.x = paddle.rand(shape=self.shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.shape, dtype=self.dtype) + + +class TestJacobianBatch(unittest.TestCase): + @classmethod + def setUpClass(self): + self.x_shape = (4, 2) + self.weight_shape = (2, 4) + self.y_shape = (4, 2) + self.dtype = 'float32' + self.np_dtype = np.float32 + self.numerical_delta = 1e-4 + self.rtol = 1e-3 + self.atol = 1e-3 + self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) + self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) + + def test_batch_single_input_and_batch_single_output(self): + def func(x): + return paddle.matmul(paddle.matmul(x, self.weight), self.y) + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + batch_jacobian = paddle.autograd.batch_jacobian( + func, + self.x, ) + + self.assertTrue( + np.allclose(batch_jacobian.numpy().all(), numerical_jacobian[0][0] + .all())) + + def test_batch_single_input_and_batch_multi_output(self): + def func(x): + return paddle.matmul(paddle.matmul(x, self.weight), self.y), x * x + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + batch_jacobian = paddle.autograd.batch_jacobian( + func, + self.x, ) + + for i in range(len(batch_jacobian)): + np.testing.assert_allclose(batch_jacobian[i].numpy(), + numerical_jacobian[i][0], self.rtol, + self.atol) + + def test_batch_multi_input_and_batch_single_output(self): + def func(x, y): + return x * y + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) + + for j in range(len(batch_jacobian)): + np.testing.assert_allclose(batch_jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + + def test_batch_multi_input_and_batch_multi_output(self): + def func(x, y): + return x * y, x * y + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + + self.x.stop_gradient = False + self.y.stop_gradient = False + batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) + + for i in range(len(batch_jacobian)): + np.testing.assert_allclose(batch_jacobian[i], numerical_jacobian[i], + self.rtol, self.atol) + + def test_allow_unused_false(self): + def func(x, y): + return x * x + + try: + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) + except ValueError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("allow_unused") > 0 + + def test_allow_unused_true(self): + def func(x, y): + return x * x + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.batch_jacobian( + func, [self.x, self.y], allow_unused=True) + + np.testing.assert_allclose( + jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol) + assert jacobian[1] is None + + def test_create_graph_false(self): + def func(x, y): + return x * y + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) + for j in range(len(jacobian)): + assert jacobian[j].stop_gradient == True + np.testing.assert_allclose(jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + try: + paddle.grad(jacobian[0], [self.x, self.y]) + except RuntimeError as e: + error_msg = cpt.get_exception_message(e) + assert error_msg.find("has no gradient") > 0 + + def test_create_graph_true(self): + def func(x, y): + return x * y + + numerical_jacobian = _compute_numerical_batch_jacobian( + func, [self.x, self.y], self.numerical_delta, self.np_dtype) + self.x.stop_gradient = False + self.y.stop_gradient = False + jacobian = paddle.autograd.batch_jacobian( + func, [self.x, self.y], create_graph=True) + for j in range(len(jacobian)): + assert jacobian[j].stop_gradient == False + np.testing.assert_allclose(jacobian[j].numpy(), + numerical_jacobian[0][j], self.rtol, + self.atol) + double_grad = paddle.grad(jacobian[0], [self.x, self.y]) + assert double_grad is not None + + +class TestJacobianBatchFloat64(TestJacobianBatch): + @classmethod + def setUpClass(self): + self.x_shape = (12, 2) + self.weight_shape = (2, 12) + self.y_shape = (12, 2) + self.dtype = 'float64' + self.np_dtype = np.float64 + self.numerical_delta = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('atol') + self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) + self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) + self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py new file mode 100644 index 0000000000..8801664fdc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/autograd/test_autograd_functional_static.py @@ -0,0 +1,455 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing +import unittest + +import numpy as np +import paddle +import paddle.fluid as fluid + +import config +import utils +from utils import (_compute_numerical_batch_jacobian, + _compute_numerical_jacobian) +from paddle.autograd.functional import _as_tensors + +paddle.enable_static() + + +@utils.place(config.DEVICES) +@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'stop_gradient'), ( + ('tensor_input', utils.reduce, np.random.rand(2, 3), None, False), + ('tensor_sequence_input', utils.reduce, np.random.rand(2, 3), None, False), + ('v_not_none', utils.reduce, np.random.rand(2, 3), np.random.rand(1), + False), + ('xs_stop_gradient', utils.reduce, np.random.rand(2, 3), np.random.rand(1), + True), + ('func_mutmul', utils.matmul, (np.random.rand(3, 2), np.random.rand(2, 3)), + None, False), + ('func_mul', utils.mul, (np.random.rand(3, 3), np.random.rand(3, 3)), None, + False), + ('func_out_two', utils.o2, (np.random.rand(10), np.random.rand(10)), None, + False), )) +class TestVJP(unittest.TestCase): + def setUp(self): + self.dtype = str(self.xs[0].dtype) if isinstance( + self.xs, typing.Sequence) else str(self.xs.dtype) + self._rtol = config.TOLERANCE.get(str(self.dtype)).get( + "first_order_grad").get("rtol") + self._atol = config.TOLERANCE.get(str(self.dtype)).get( + "first_order_grad").get("atol") + + def _vjp(self): + exe = paddle.static.Executor() + sp = paddle.static.Program() + mp = paddle.static.Program() + with paddle.static.program_guard(mp, sp): + feed, static_xs, static_v = gen_static_data_and_feed( + self.xs, self.v, stop_gradient=self.stop_gradient) + ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v) + exe.run(sp) + return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads]) + + def _expected_vjp(self): + exe = paddle.static.Executor() + sp = paddle.static.Program() + mp = paddle.static.Program() + with paddle.static.program_guard(mp, sp): + feed, static_xs, static_v = gen_static_data_and_feed(self.xs, + self.v, False) + ys = self.fun(*static_xs) if isinstance( + static_xs, typing.Sequence) else self.fun(static_xs) + xs_grads = paddle.static.gradients(ys, static_xs, static_v) + exe.run(sp) + return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads]) + + def test_vjp(self): + actual = self._vjp() + expected = self._expected_vjp() + self.assertEqual(len(actual), len(expected)) + for i in range(len(actual)): + np.testing.assert_allclose( + actual[i], expected[i], rtol=self._rtol, atol=self._atol) + + +@utils.place(config.DEVICES) +@utils.parameterize( + (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), ( + ('v_shape_not_equal_ys', utils.square, np.random.rand(3), + np.random.rand(1), RuntimeError), )) +class TestVJPException(unittest.TestCase): + def setUp(self): + self.exe = paddle.static.Executor() + + def _vjp(self): + sp = paddle.static.Program() + mp = paddle.static.Program() + with paddle.static.program_guard(mp, sp): + feed, static_xs, static_v = gen_static_data_and_feed(self.xs, + self.v) + ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v) + self.exe.run(sp) + return self.exe.run(mp, feed, fetch_list=[ys, xs_grads]) + + def test_vjp(self): + with self.assertRaises(self.expected_exception): + self._vjp() + + +def gen_static_data_and_feed(xs, v, stop_gradient=True): + feed = {} + if isinstance(xs, typing.Sequence): + static_xs = [] + for i, x in enumerate(xs): + x = paddle.static.data(f"x{i}", x.shape, x.dtype) + x.stop_gradient = stop_gradient + static_xs.append(x) + feed.update({f'x{idx}': value for idx, value in enumerate(xs)}) + else: + static_xs = paddle.static.data('x', xs.shape, xs.dtype) + static_xs.stop_gradient = stop_gradient + feed.update({'x': xs}) + + if isinstance(v, typing.Sequence): + static_v = [] + for i, e in enumerate(v): + e = paddle.static.data(f'v{idx}', v.shape, v.dtype) + e.stop_gradient = stop_gradient + static_v.append(e) + feed.update({f'v{idx}': value for idx, value in v}) + elif v is not None: + static_v = paddle.static.data('v', v.shape, v.dtype) + static_v.stop_gradient = stop_gradient + feed.update({'v': v}) + else: + static_v = v + + return feed, static_xs, static_v + + +def approx_jacobian(f, xs, dtype, eps=1e-5, batch=False): + r"""Computes an approximate Jacobian matrix of a multi-valued function + using finite differences. + + The function input is required to be an np array or a list of list of np + arrays. + """ + + def flatten(x): + if len(x.shape) > 0: + to = [x.shape[0], -1] if batch else [-1] + return x.reshape(to) + else: + return x + + def flatten_all(xs): + if isinstance(xs, list): + flattened = np.concatenate([flatten(x) for x in xs], axis=-1) + else: + flattened = flatten(xs) + return flattened + + def x_like(x, orig_x): + return x.reshape(orig_x.shape) + + def _f(x): + if multi_inps: + _xs = np.split(x, splits, axis=-1) + _xs = [x_like(_x, _o) for _x, _o in zip(_xs, xs)] + outs = f(_xs) + else: + outs = f(x) + return flatten_all(outs) + + multi_inps = False if isinstance(xs, np.ndarray) else True + x = flatten_all(xs) + xdim = x.shape[-1] + splits = [] + + if multi_inps: + split = 0 + for inp in xs: + split += flatten(inp).shape[-1] + splits.append(split) + + ds = eps * np.eye(xdim, dtype=dtype) + + fprimes_by_x = [(0.5 * (_f(x + d) - _f(x - d)) / eps) for d in ds] + fprimes_by_y = np.stack(fprimes_by_x, axis=-1) + return np.transpose(fprimes_by_y, [1, 0, 2]) if batch else fprimes_by_y + + +def make_tensors(inps): + if isinstance(inps, list): + xs = [ + paddle.static.data( + f'x{i}', inp.shape, dtype=inp.dtype) + for i, inp in enumerate(inps) + ] + else: + xs = paddle.static.data(name='x', shape=inps.shape, dtype=inps.dtype) + return xs + + +all_data_shapes = { + 'A': [[1., 2.]], + 'B': [[1., 2.], [2., 1.]], + 'C': [[2., 2.], [2., 1.]], + 'D': [[[2., 2.], [2., 1.]], [[1., 2.], [2., 1.]]], + 'E': [[[3., 4.], [2., 3.]], [[2., 1.], [1., 3.]]], +} + + +def prepare_data(test, input_shapes, dtype): + for name, shape in input_shapes.items(): + setattr(test, name, np.array(shape, dtype=dtype)) + + +class TestJacobianFloat32(unittest.TestCase): + @classmethod + def setUpClass(self): + paddle.enable_static() + if fluid.core.is_compiled_with_cuda(): + self.place = fluid.CUDAPlace(0) + else: + self.place = fluid.CPUPlace() + self.dtype = 'float32' + self.np_dtype = np.float32 + prepare_data(self, all_data_shapes, self.dtype) + self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get( + 'eps') + # self.rtol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('rtol') + # self.atol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('atol') + # Do't use tolerance in config, which will cause this test case failed. + self.rtol = 1e-2 + self.atol = 1e-2 + + def run_test_by_fullmatrix(self, pd_f, np_f, inps, batch=False): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + xs = make_tensors(inps) + JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) + if batch: + _, nrow, ncol = JJ.shape + else: + nrow, ncol = JJ.shape + full_jacobian = JJ[:] + exe = fluid.Executor(self.place) + exe.run(startup) + if isinstance(inps, list): + feeds = {f'x{i}': x for i, x in enumerate(inps)} + else: + feeds = {'x': inps} + pd_jacobians = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0] + np_jacobians = approx_jacobian( + np_f, inps, self.dtype, self.eps, batch=batch) + if batch: + np_jacobians = utils._np_transpose_matrix_format( + np_jacobians, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM) + + np.testing.assert_allclose(pd_jacobians, np_jacobians, self.rtol, + self.atol) + + def run_test_by_rows(self, pd_f, np_f, inps, batch=False): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + xs = make_tensors(inps) + JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) + if batch: + nbatch, nrow, ncol = JJ.shape + rows = [JJ[:, i, :] for i in range(nrow)] + else: + nrow, ncol = JJ.shape + rows = [JJ[i, :] for i in range(nrow)] + + exe = fluid.Executor(self.place) + exe.run(startup) + if isinstance(inps, list): + feeds = {f'x{i}': x for i, x in enumerate(inps)} + else: + feeds = {'x': inps} + pd_jac = exe.run(main, feed=feeds, fetch_list=[rows]) + np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch) + for i in range(nrow): + np.testing.assert_allclose(pd_jac[i], np_jac[i], self.rtol, + self.atol) + + def run_test_by_entries(self, pd_f, np_f, inps, batch=False): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + xs = make_tensors(inps) + JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch) + if batch: + nbatch, nrow, ncol = JJ.shape + entries = [ + JJ[:, i, j] for i in range(nrow) for j in range(ncol) + ] + else: + nrow, ncol = JJ.shape + entries = [JJ[i, j] for i in range(nrow) for j in range(ncol)] + exe = fluid.Executor(self.place) + exe.run(startup) + if isinstance(inps, list): + feeds = {f'x{i}': x for i, x in enumerate(inps)} + else: + feeds = {'x': inps} + pd_entries = exe.run(main, feed=feeds, fetch_list=[entries]) + np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch) + np_entries = [ + np_jac[i, ..., j] for i in range(nrow) for j in range(ncol) + ] + for pd_entry, np_entry in zip(pd_entries, np_entries): + np.testing.assert_allclose(pd_entry, np_entry, self.rtol, self.atol) + + def test_square(self): + def pd_f(x): + return paddle.multiply(x, x) + + def np_f(x): + return np.multiply(x, x) + + self.run_test_by_fullmatrix(pd_f, np_f, self.A) + self.run_test_by_rows(pd_f, np_f, self.A) + self.run_test_by_entries(pd_f, np_f, self.A) + + def test_mul(self): + def pd_f(x, y): + return paddle.multiply(x, y) + + def np_f(xs): + x, y = xs + return np.multiply(x, y) + + self.run_test_by_fullmatrix( + pd_f, + np_f, + [self.B, self.C], ) + self.run_test_by_rows(pd_f, np_f, [self.B, self.C]) + self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) + + def test_matmul(self): + def pd_f(x, y): + return paddle.matmul(x, y) + + def np_f(xs): + x, y = xs + return np.matmul(x, y) + + self.run_test_by_fullmatrix(pd_f, np_f, [self.B, self.C]) + self.run_test_by_rows(pd_f, np_f, [self.B, self.C]) + self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) + + def test_batch_matmul(self): + def pd_f(x, y): + return paddle.matmul(x, y) + + def np_f(xs): + x, y = xs + return np.matmul(x, y) + + self.run_test_by_fullmatrix(pd_f, np_f, [self.D, self.E], batch=True) + self.run_test_by_rows(pd_f, np_f, [self.D, self.E], batch=True) + self.run_test_by_entries(pd_f, np_f, [self.D, self.E], batch=True) + + +class TestJacobianFloat64(TestJacobianFloat32): + @classmethod + def setUpClass(self): + paddle.enable_static() + if fluid.core.is_compiled_with_cuda(): + self.place = fluid.CUDAPlace(0) + else: + self.place = fluid.CPUPlace() + self.dtype = 'float64' + prepare_data(self, all_data_shapes, self.dtype) + self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get( + 'eps') + self.rtol = config.TOLERANCE.get(self.dtype).get( + 'first_order_grad').get('rtol') + self.atol = config.TOLERANCE.get(self.dtype).get( + 'first_order_grad').get('atol') + + +class TestHessianFloat32(unittest.TestCase): + @classmethod + def setUpClass(self): + paddle.enable_static() + if fluid.core.is_compiled_with_cuda(): + self.place = fluid.CUDAPlace(0) + else: + self.place = fluid.CPUPlace() + self.dtype = 'float32' + prepare_data(self, all_data_shapes, self.dtype) + self.eps = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('atol') + + def run_test_by_fullmatrix(self, pd_f, inps, np_hess, batch=False): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + xs = make_tensors(inps) + HH = paddle.autograd.functional.Hessian(pd_f, xs, is_batched=batch) + nrow, ncol = HH.shape + full_hessian = HH[:] + exe = fluid.Executor(self.place) + exe.run(startup) + if isinstance(inps, list): + feeds = {f'x{i}': x for i, x in enumerate(inps)} + else: + feeds = {'x': inps} + pd_hess = exe.run(main, feed=feeds, fetch_list=[full_hessian])[0] + np.testing.assert_allclose(pd_hess, np_hess, self.rtol, self.atol) + + def test_square(self): + def pd_f(x): + """Input is a square matrix.""" + return paddle.matmul(x, x.T).flatten().sum() + + def np_hess(x): + dim = x.shape[0] + upperleft = 2 * np.eye(dim, dtype=self.dtype) + upper = np.concatenate((upperleft, upperleft)) + return np.concatenate((upper, upper), axis=1) + + self.run_test_by_fullmatrix(pd_f, self.B, np_hess(self.B)) + + +class TestHessianFloat64(TestHessianFloat32): + @classmethod + def setUpClass(self): + paddle.enable_static() + if fluid.core.is_compiled_with_cuda(): + self.place = fluid.CUDAPlace(0) + else: + self.place = fluid.CPUPlace() + self.dtype = 'float64' + prepare_data(self, all_data_shapes, self.dtype) + self.eps = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('eps') + self.rtol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('rtol') + self.atol = config.TOLERANCE.get(self.dtype).get( + 'second_order_grad').get('atol') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_autograd_static.py b/python/paddle/fluid/tests/unittests/autograd/test_autograd_static.py deleted file mode 100644 index 60dc9d06b8..0000000000 --- a/python/paddle/fluid/tests/unittests/autograd/test_autograd_static.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np -import paddle -import paddle.fluid as fluid -from utils import _compute_numerical_jacobian, _compute_numerical_batch_jacobian - - -def approx_jacobian(f, xs, dtype, eps=1e-5, batch=False): - r"""Computes an approximate Jacobian matrix of a multi-valued function - using finite differences. - - The function input is required to be an np array or a list of list of np - arrays. - """ - - def flatten(x): - if len(x.shape) > 0: - to = [x.shape[0], -1] if batch else [-1] - return x.reshape(to) - else: - return x - - def flatten_all(xs): - if isinstance(xs, list): - flattened = np.concatenate([flatten(x) for x in xs], axis=-1) - else: - flattened = flatten(xs) - return flattened - - def x_like(x, orig_x): - return x.reshape(orig_x.shape) - - def _f(x): - if multi_inps: - _xs = np.split(x, splits, axis=-1) - _xs = [x_like(_x, _o) for _x, _o in zip(_xs, xs)] - outs = f(_xs) - else: - outs = f(x) - return flatten_all(outs) - - multi_inps = False if isinstance(xs, np.ndarray) else True - x = flatten_all(xs) - xdim = x.shape[-1] - splits = [] - - if multi_inps: - split = 0 - for inp in xs: - split += flatten(inp).shape[-1] - splits.append(split) - - ds = eps * np.eye(xdim, dtype=dtype) - - fprimes_by_x = [(0.5 * (_f(x + d) - _f(x - d)) / eps) for d in ds] - fprimes_by_y = np.stack(fprimes_by_x, axis=-1) - return np.transpose(fprimes_by_y, [1, 0, 2]) if batch else fprimes_by_y - - -def make_tensors(inps): - if isinstance(inps, list): - xs = [ - paddle.static.data( - f'x{i}', inp.shape, dtype=inp.dtype) - for i, inp in enumerate(inps) - ] - else: - xs = paddle.static.data(name='x', shape=inps.shape, dtype=inps.dtype) - return xs - - -all_data_shapes = { - 'A': [[1., 2.]], - 'B': [[1., 2.], [2., 1.]], - 'C': [[2., 2.], [2., 1.]], - 'D': [[[2., 2.], [2., 1.]], [[1., 2.], [2., 1.]]], - 'E': [[[3., 4.], [2., 3.]], [[2., 1.], [1., 3.]]], -} - - -def prepare_data(test, input_shapes, dtype): - for name, shape in input_shapes.items(): - setattr(test, name, np.array(shape, dtype=dtype)) - - -class TestJacobianFloat32(unittest.TestCase): - @classmethod - def setUpClass(self): - paddle.enable_static() - if fluid.core.is_compiled_with_cuda(): - self.place = fluid.CUDAPlace(0) - else: - self.place = fluid.CPUPlace() - self.dtype = 'float32' - prepare_data(self, all_data_shapes, self.dtype) - self.eps = 1e-4 - self.rtol = 1e-2 - self.atol = 1e-2 - - def run_test_by_fullmatrix(self, pd_f, np_f, inps, batch=False): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - xs = make_tensors(inps) - JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) - nrow, ncol = JJ.shape() - full_jacobian = JJ[:] - exe = fluid.Executor(self.place) - exe.run(startup) - if isinstance(inps, list): - feeds = {f'x{i}': x for i, x in enumerate(inps)} - else: - feeds = {'x': inps} - pd_jacobians = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0] - np_jacobians = approx_jacobian( - np_f, inps, self.dtype, self.eps, batch=batch) - self.assertTrue( - np.allclose(pd_jacobians, np_jacobians, self.rtol, self.atol)) - - def run_test_by_rows(self, pd_f, np_f, inps, batch=False): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - xs = make_tensors(inps) - JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) - nrow, ncol = JJ.shape() - rows = [JJ[i] for i in range(nrow)] - exe = fluid.Executor(self.place) - exe.run(startup) - if isinstance(inps, list): - feeds = {f'x{i}': x for i, x in enumerate(inps)} - else: - feeds = {'x': inps} - pd_jac = exe.run(main, feed=feeds, fetch_list=[rows]) - np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch) - for i in range(nrow): - self.assertTrue( - np.allclose(pd_jac[i], np_jac[i], self.rtol, self.atol)) - - def run_test_by_entries(self, pd_f, np_f, inps, batch=False): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - xs = make_tensors(inps) - JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) - nrow, ncol = JJ.shape() - entries = [JJ[i, j] for i in range(nrow) for j in range(ncol)] - exe = fluid.Executor(self.place) - exe.run(startup) - if isinstance(inps, list): - feeds = {f'x{i}': x for i, x in enumerate(inps)} - else: - feeds = {'x': inps} - pd_entries = exe.run(main, feed=feeds, fetch_list=[entries]) - np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch) - np_entries = [ - np_jac[i, ..., j] for i in range(nrow) for j in range(ncol) - ] - for pd_entry, np_entry in zip(pd_entries, np_entries): - self.assertTrue( - np.allclose(pd_entry, np_entry, self.rtol, self.atol)) - - def test_square(self): - def pd_f(x): - return paddle.multiply(x, x) - - def np_f(x): - return np.multiply(x, x) - - self.run_test_by_fullmatrix(pd_f, np_f, self.A) - self.run_test_by_rows(pd_f, np_f, self.A) - self.run_test_by_entries(pd_f, np_f, self.A) - - def test_mul(self): - def pd_f(xs): - x, y = xs - return paddle.multiply(x, y) - - def np_f(xs): - x, y = xs - return np.multiply(x, y) - - self.run_test_by_fullmatrix( - pd_f, - np_f, - [self.B, self.C], ) - self.run_test_by_rows(pd_f, np_f, [self.B, self.C]) - self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) - - def test_matmul(self): - def pd_f(xs): - x, y = xs - return paddle.matmul(x, y) - - def np_f(xs): - x, y = xs - return np.matmul(x, y) - - self.run_test_by_fullmatrix(pd_f, np_f, [self.B, self.C]) - self.run_test_by_rows(pd_f, np_f, [self.B, self.C]) - self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) - - def test_batch_matmul(self): - def pd_f(xs): - x, y = xs - return paddle.matmul(x, y) - - def np_f(xs): - x, y = xs - return np.matmul(x, y) - - self.run_test_by_fullmatrix(pd_f, np_f, [self.D, self.E], batch=True) - self.run_test_by_rows(pd_f, np_f, [self.D, self.E], batch=True) - self.run_test_by_entries(pd_f, np_f, [self.D, self.E], batch=True) - - -class TestJacobianFloat64(TestJacobianFloat32): - @classmethod - def setUpClass(self): - paddle.enable_static() - if fluid.core.is_compiled_with_cuda(): - self.place = fluid.CUDAPlace(0) - else: - self.place = fluid.CPUPlace() - self.dtype = 'float64' - prepare_data(self, all_data_shapes, self.dtype) - self.eps = 1e-7 - self.rtol = 1e-6 - self.atol = 1e-6 - - -class TestHessianFloat64(unittest.TestCase): - @classmethod - def setUpClass(self): - paddle.enable_static() - if fluid.core.is_compiled_with_cuda(): - self.place = fluid.CUDAPlace(0) - else: - self.place = fluid.CPUPlace() - self.dtype = 'float64' - prepare_data(self, all_data_shapes, self.dtype) - self.eps = 1e-7 - self.rtol = 1e-6 - self.atol = 1e-6 - - def run_test_by_fullmatrix(self, pd_f, inps, np_hess, batch=False): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - xs = make_tensors(inps) - HH = paddle.autograd.functional.Hessian(pd_f, xs, batch=batch) - nrow, ncol = HH.shape() - full_hessian = HH[:] - exe = fluid.Executor(self.place) - exe.run(startup) - if isinstance(inps, list): - feeds = {f'x{i}': x for i, x in enumerate(inps)} - else: - feeds = {'x': inps} - pd_hess = exe.run(main, feed=feeds, fetch_list=[full_hessian])[0] - self.assertTrue(np.allclose(pd_hess, np_hess, self.rtol, self.atol)) - - def test_square(self): - def pd_f(x): - """Input is a square matrix.""" - return paddle.matmul(x, x.T) - - def np_hess(x): - dim = x.shape[0] - f_xx_upperleft = 2 * np.eye(dim, dtype=self.dtype) - f_xx = np.zeros([dim * dim, dim * dim], dtype=self.dtype) - f_xx[:dim, :dim] = f_xx_upperleft - return f_xx - - self.run_test_by_fullmatrix(pd_f, self.B, np_hess(self.B)) - - def test_batch_square(self): - def pd_f(x): - """Input is a square matrix.""" - return paddle.matmul(x, paddle.transpose(x, [0, 2, 1])) - - def np_hess(x): - bat, dim, _ = x.shape - f_xx_upperleft = 2 * np.eye(dim, dtype=self.dtype) - f_xx = np.zeros([bat, dim * dim, dim * dim], dtype=self.dtype) - f_xx[..., :dim, :dim] = f_xx_upperleft - return f_xx - - self.run_test_by_fullmatrix( - pd_f, self.E, np_hess(self.E), batch=True) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_hessian.py b/python/paddle/fluid/tests/unittests/autograd/test_hessian.py deleted file mode 100644 index 7b3bd9fd55..0000000000 --- a/python/paddle/fluid/tests/unittests/autograd/test_hessian.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np -import paddle -import paddle.compat as cpt -import paddle.nn.functional as F -from utils import _compute_numerical_hessian, _compute_numerical_batch_hessian - - -class TestHessian(unittest.TestCase): - @classmethod - def setUpClass(self): - self.shape = (2, 2) - self.dtype = 'float32' - self.np_dtype = np.float32 - self.numerical_delta = 1e-2 - self.rtol = 1e-2 - self.atol = 1e-2 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - - def test_single_input(self): - def func(x): - return paddle.sum(paddle.matmul(x, x)) - - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - hessian = paddle.autograd.hessian(func, self.x) - assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol, - self.atol) - - def test_multi_input(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, y)) - - numerical_hessian = _compute_numerical_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.hessian(func, [self.x, self.y]) - for i in range(len(hessian)): - for j in range(len(hessian[0])): - assert np.allclose(hessian[i][j].numpy(), - numerical_hessian[i][j], self.rtol, - self.atol) - - def test_allow_unused_false(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, x)) - - try: - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.hessian(func, [self.x, self.y]) - except ValueError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("allow_unused") > 0 - - def test_allow_unused_true(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, x)) - - numerical_hessian = _compute_numerical_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.hessian( - func, [self.x, self.y], allow_unused=True) - for i in range(len(hessian)): - for j in range(len(hessian[0])): - if i == j == 0: - assert np.allclose(hessian[i][j].numpy(), - numerical_hessian[i][j], self.rtol, - self.atol) - else: - assert hessian[i][j] is None - - def test_create_graph_false(self): - def func(x): - return paddle.sum(paddle.matmul(x, x)) - - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - hessian = paddle.autograd.hessian(func, self.x) - assert hessian.stop_gradient == True - assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol, - self.atol) - try: - paddle.grad(hessian, self.x) - except RuntimeError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("has no gradient") > 0 - - def test_create_graph_true(self): - def func(x): - return paddle.sum(F.sigmoid(x)) - - numerical_hessian = _compute_numerical_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - hessian = paddle.autograd.hessian(func, self.x, create_graph=True) - assert hessian.stop_gradient == False - assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol, - self.atol) - triple_grad = paddle.grad(hessian, self.x) - assert triple_grad is not None - - -class TestHessianFloat64(TestHessian): - @classmethod - def setUpClass(self): - self.shape = (2, 2) - self.dtype = 'float64' - self.np_dtype = np.float64 - self.numerical_delta = 1e-5 - self.rtol = 1e-5 - self.atol = 1e-5 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - - -class TestBatchHessian(unittest.TestCase): - @classmethod - def setUpClass(self): - self.x_shape = (5, 2) - self.weight_shape = (2, 4) - self.y_shape = (5, 2) - self.dtype = 'float32' - self.np_dtype = np.float32 - self.numerical_delta = 1e-2 - self.rtol = 1e-3 - self.atol = 1e-3 - self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) - self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) - - def test_single_input(self): - def func(x): - return paddle.matmul(x * x, self.weight)[:, 0:1] - - numerical_hessian = _compute_numerical_batch_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True) - assert np.allclose(hessian, numerical_hessian, self.rtol, self.atol) - - def test_multi_input(self): - def func(x, y): - return paddle.matmul(x * x * y * y, self.weight)[:, 0:1] - - numerical_hessian = _compute_numerical_batch_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.batch_hessian(func, [self.x, self.y]) - - shape_tensor = paddle.to_tensor(numerical_hessian).astype("float64") - hessian_reshape = np.reshape(hessian, (shape_tensor.shape)) - assert np.allclose(hessian_reshape, numerical_hessian, self.rtol, - self.atol) - - def test_allow_unused_false(self): - def func(x, y): - return paddle.matmul(x * x, self.weight)[:, 0:1] - - try: - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.batch_hessian(func, [self.x, self.y]) - except ValueError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("allow_unused") > 0 - - def test_allow_unused_true(self): - def func(x, y): - return paddle.matmul(x * x, self.weight)[:, 0:1] - - numerical_hessian = _compute_numerical_batch_hessian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - hessian = paddle.autograd.batch_hessian( - func, [self.x, self.y], allow_unused=True) - - for i in range(len(hessian)): - for j in range(len(hessian[0])): - if i == j == 0: - numerical_hessian = np.stack( - (numerical_hessian[i][j], numerical_hessian[i][j + 1]), - axis=0) - assert np.allclose(hessian[i][j], numerical_hessian, - self.rtol, self.atol) - else: - assert hessian[i][j] is None - - def test_create_graph_false(self): - def func(x): - return paddle.matmul(x * x, self.weight)[:, 0:1] - - numerical_hessian = _compute_numerical_batch_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - hessian = paddle.autograd.batch_hessian(func, self.x) - assert hessian.stop_gradient == True - assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol, - self.atol) - try: - paddle.grad(hessian, self.x) - except RuntimeError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("has no gradient") > 0 - - def test_create_graph_true(self): - def func(x): - return paddle.matmul(x * x, self.weight)[:, 0:1] - - numerical_hessian = _compute_numerical_batch_hessian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True) - assert hessian.stop_gradient == False - assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol, - self.atol) - triple_grad = paddle.grad(hessian, self.x) - assert triple_grad is not None - - -class TestBatchHessianFloat64(TestBatchHessian): - @classmethod - def setUpClass(self): - self.x_shape = (5, 2) - self.weight_shape = (2, 4) - self.y_shape = (5, 2) - self.dtype = 'float64' - self.np_dtype = np.float64 - self.numerical_delta = 1e-4 - self.rtol = 1e-5 - self.atol = 1e-5 - self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) - self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_jacobian.py b/python/paddle/fluid/tests/unittests/autograd/test_jacobian.py deleted file mode 100644 index 335ea4e519..0000000000 --- a/python/paddle/fluid/tests/unittests/autograd/test_jacobian.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np -import paddle -import paddle.compat as cpt -from utils import _compute_numerical_jacobian, _compute_numerical_batch_jacobian - - -class TestJacobian(unittest.TestCase): - @classmethod - def setUpClass(self): - self.shape = (4, 4) - self.dtype = 'float32' - self.np_dtype = np.float32 - self.numerical_delta = 1e-4 - self.rtol = 1e-3 - self.atol = 1e-3 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - - def test_single_input_and_single_output(self): - def func(x): - return paddle.matmul(x, x) - - numerical_jacobian = _compute_numerical_jacobian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, self.x) - assert np.allclose(jacobian.numpy(), numerical_jacobian[0][0], - self.rtol, self.atol) - - def test_single_input_and_multi_output(self): - def func(x): - return paddle.matmul(x, x), x * x - - numerical_jacobian = _compute_numerical_jacobian( - func, self.x, self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, self.x) - for i in range(len(jacobian)): - assert np.allclose(jacobian[i].numpy(), numerical_jacobian[i][0], - self.rtol, self.atol) - - def test_multi_input_and_single_output(self): - def func(x, y): - return paddle.matmul(x, y) - - numerical_jacobian = _compute_numerical_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) - for j in range(len(jacobian)): - assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j], - self.rtol, self.atol) - - def test_multi_input_and_multi_output(self): - def func(x, y): - return paddle.matmul(x, y), x * y - - numerical_jacobian = _compute_numerical_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) - for i in range(len(jacobian)): - for j in range(len(jacobian[0])): - assert np.allclose(jacobian[i][j].numpy(), - numerical_jacobian[i][j], self.rtol, - self.atol) - - def test_allow_unused_false(self): - def func(x, y): - return paddle.matmul(x, x) - - try: - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) - except ValueError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("allow_unused") > 0 - - def test_allow_unused_true(self): - def func(x, y): - return paddle.matmul(x, x) - - numerical_jacobian = _compute_numerical_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian( - func, [self.x, self.y], allow_unused=True) - assert np.allclose(jacobian[0].numpy(), numerical_jacobian[0][0], - self.rtol, self.atol) - assert jacobian[1] is None - - def test_create_graph_false(self): - def func(x, y): - return paddle.matmul(x, y) - - numerical_jacobian = _compute_numerical_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian(func, [self.x, self.y]) - for j in range(len(jacobian)): - assert jacobian[j].stop_gradient == True - assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j], - self.rtol, self.atol) - try: - paddle.grad(jacobian[0], [self.x, self.y]) - except RuntimeError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("has no gradient") > 0 - - def test_create_graph_true(self): - def func(x, y): - return paddle.matmul(x, y) - - numerical_jacobian = _compute_numerical_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.jacobian( - func, [self.x, self.y], create_graph=True) - for j in range(len(jacobian)): - assert jacobian[j].stop_gradient == False - assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j], - self.rtol, self.atol) - double_grad = paddle.grad(jacobian[0], [self.x, self.y]) - assert double_grad is not None - - -class TestJacobianFloat64(TestJacobian): - @classmethod - def setUpClass(self): - self.shape = (4, 4) - self.dtype = 'float64' - self.np_dtype = np.float64 - self.numerical_delta = 1e-7 - self.rtol = 1e-7 - self.atol = 1e-7 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - - -class TestJacobianBatch(unittest.TestCase): - @classmethod - def setUpClass(self): - self.x_shape = (4, 2) - self.weight_shape = (2, 4) - self.y_shape = (4, 2) - self.dtype = 'float32' - self.np_dtype = np.float32 - self.numerical_delta = 1e-4 - self.rtol = 1e-3 - self.atol = 1e-3 - self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) - self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) - - def test_batch_single_input_and_batch_single_output(self): - def func(x): - return paddle.matmul(paddle.matmul(x, self.weight), self.y) - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - batch_jacobian = paddle.autograd.batch_jacobian( - func, - self.x, ) - - self.assertTrue( - np.allclose(batch_jacobian.numpy().all(), numerical_jacobian[0][0] - .all())) - - def test_batch_single_input_and_batch_multi_output(self): - def func(x): - return paddle.matmul(paddle.matmul(x, self.weight), self.y), x * x - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - batch_jacobian = paddle.autograd.batch_jacobian( - func, - self.x, ) - - for i in range(len(batch_jacobian)): - assert np.allclose(batch_jacobian[i].numpy(), - numerical_jacobian[i][0], self.rtol, self.atol) - - def test_batch_multi_input_and_batch_single_output(self): - def func(x, y): - return x * y - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) - - for j in range(len(batch_jacobian)): - assert np.allclose(batch_jacobian[j].numpy(), - numerical_jacobian[0][j], self.rtol, self.atol) - - def test_batch_multi_input_and_batch_multi_output(self): - def func(x, y): - return x * y, x * y - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) - - for i in range(len(batch_jacobian)): - assert np.allclose(batch_jacobian[i], numerical_jacobian[i], - self.rtol, self.atol) - - def test_allow_unused_false(self): - def func(x, y): - return x * x - - try: - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) - except ValueError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("allow_unused") > 0 - - def test_allow_unused_true(self): - def func(x, y): - return x * x - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian( - func, [self.x, self.y], allow_unused=True) - - assert np.allclose(jacobian[0].numpy(), numerical_jacobian[0][0], - self.rtol, self.atol) - assert jacobian[1] is None - - def test_create_graph_false(self): - def func(x, y): - return x * y - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y]) - for j in range(len(jacobian)): - assert jacobian[j].stop_gradient == True - assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j], - self.rtol, self.atol) - try: - paddle.grad(jacobian[0], [self.x, self.y]) - except RuntimeError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("has no gradient") > 0 - - def test_create_graph_true(self): - def func(x, y): - return x * y - - numerical_jacobian = _compute_numerical_batch_jacobian( - func, [self.x, self.y], self.numerical_delta, self.np_dtype) - self.x.stop_gradient = False - self.y.stop_gradient = False - jacobian = paddle.autograd.batch_jacobian( - func, [self.x, self.y], create_graph=True) - for j in range(len(jacobian)): - assert jacobian[j].stop_gradient == False - assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j], - self.rtol, self.atol) - double_grad = paddle.grad(jacobian[0], [self.x, self.y]) - assert double_grad is not None - - -class TestJacobianBatchFloat64(TestJacobianBatch): - @classmethod - def setUpClass(self): - self.x_shape = (12, 2) - self.weight_shape = (2, 12) - self.y_shape = (12, 2) - self.dtype = 'float64' - self.np_dtype = np.float64 - self.numerical_delta = 1e-7 - self.rtol = 1e-7 - self.atol = 1e-7 - self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype) - self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_vhp.py b/python/paddle/fluid/tests/unittests/autograd/test_vhp.py deleted file mode 100644 index 09b25203e0..0000000000 --- a/python/paddle/fluid/tests/unittests/autograd/test_vhp.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np -import paddle -import paddle.compat as cpt -import paddle.nn.functional as F -from utils import _compute_numerical_vhp - - -class TestVHP(unittest.TestCase): - @classmethod - def setUpClass(self): - self.shape = (2, 2) - self.dtype = 'float32' - self.np_dtype = np.float32 - self.numerical_delta = 1e-2 - self.rtol = 1e-2 - self.atol = 1e-2 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - self.vx = paddle.rand(shape=self.shape, dtype=self.dtype) - self.vy = paddle.rand(shape=self.shape, dtype=self.dtype) - - def test_single_input(self): - def func(x): - return paddle.sum(paddle.matmul(x, x)) - - numerical_func_output = func(self.x).numpy() - numerical_vhp = _compute_numerical_vhp( - func, self.x, self.vx, self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, - self.atol) - - def test_multi_input(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, y)) - - numerical_func_output = func(self.x, self.y).numpy() - numerical_vhp = _compute_numerical_vhp( - func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, - self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y], - [self.vx, self.vy]) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - for i in range(len(vhp)): - assert np.allclose(vhp[i].numpy(), numerical_vhp[i], self.rtol, - self.atol) - - def test_v_default(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, y)) - - numerical_func_output = func(self.x, self.y).numpy() - vx = paddle.ones(self.vx.shape, dtype=self.vx.dtype) - vy = paddle.ones(self.vy.shape, dtype=self.vy.dtype) - numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y], - [vx, vy], self.numerical_delta, - self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y]) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - for i in range(len(vhp)): - assert np.allclose(vhp[i].numpy(), numerical_vhp[i], self.rtol, - self.atol) - - def test_allow_unused_false(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, x)) - - try: - self.x.stop_gradient = False - self.y.stop_gradient = False - _ = paddle.autograd.vhp(func, [self.x, self.y]) - except ValueError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("allow_unused") > 0 - - def test_allow_unused_true(self): - def func(x, y): - return paddle.sum(paddle.matmul(x, x)) - - numerical_func_output = func(self.x, self.y).numpy() - numerical_vhp = _compute_numerical_vhp( - func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta, - self.np_dtype) - - self.x.stop_gradient = False - self.y.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y], - [self.vx, self.vy], - allow_unused=True) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, - self.atol) - assert vhp[1] is None - - def test_create_graph_false(self): - def func(x): - return paddle.sum(F.sigmoid(x)) - - numerical_func_output = func(self.x).numpy() - numerical_vhp = _compute_numerical_vhp( - func, self.x, self.vx, self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - assert vhp[0].stop_gradient == True - assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, - self.atol) - try: - paddle.grad(vhp, self.x) - except RuntimeError as e: - error_msg = cpt.get_exception_message(e) - assert error_msg.find("has no gradient") > 0 - - def test_create_graph_true(self): - def func(x): - return paddle.sum(F.sigmoid(x)) - - numerical_func_output = func(self.x).numpy() - numerical_vhp = _compute_numerical_vhp( - func, self.x, self.vx, self.numerical_delta, self.np_dtype) - - self.x.stop_gradient = False - func_output, vhp = paddle.autograd.vhp(func, - self.x, - self.vx, - create_graph=True) - assert np.allclose(func_output.numpy(), numerical_func_output, - self.rtol, self.atol) - assert vhp[0].stop_gradient == False - assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol, - self.atol) - triple_grad = paddle.grad(vhp, self.x) - assert triple_grad is not None - - -class TestVHPFloat64(TestVHP): - @classmethod - def setUpClass(self): - self.shape = (2, 2) - self.dtype = 'float64' - self.np_dtype = np.float64 - self.numerical_delta = 1e-5 - self.rtol = 1e-5 - self.atol = 1e-5 - self.x = paddle.rand(shape=self.shape, dtype=self.dtype) - self.y = paddle.rand(shape=self.shape, dtype=self.dtype) - self.vx = paddle.rand(shape=self.shape, dtype=self.dtype) - self.vy = paddle.rand(shape=self.shape, dtype=self.dtype) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py b/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py deleted file mode 100644 index c228ad7932..0000000000 --- a/python/paddle/fluid/tests/unittests/autograd/test_vjp_jvp.py +++ /dev/null @@ -1,315 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import paddle - -from paddle.autograd.functional import vjp, jvp, _tensors -from paddle import grad, ones_like, zeros_like - - -def reduce(x): - return paddle.sum(x) - - -def reduce_dim(x): - return paddle.sum(x, axis=0) - - -def matmul(x, y): - return paddle.matmul(x, y) - - -def mul(x, y): - return x * y - - -def pow(x, y): - return paddle.pow(x, y) - - -def o2(x, y): - return paddle.multiply(x, y), paddle.matmul(x, y.t()) - - -def unuse(x, y): - return paddle.sum(x) - - -def nested(x): - def inner(y): - return x * y - - return inner - - -def make_v(f, inputs): - outputs = _tensors(f(*inputs), "outputs") - return [ones_like(x) for x in outputs] - - -class TestAutogradFunctional(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.RAW_INPUTS = { - 'a': [1.0], - 'b': [1.0, 2.0], - 'c': [3.0, 4.0], - 'd': [[2.0], [3.0]], - 'A': [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], - 'B': [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], - } - - def setUp(self): - pass - - def gen_input(self, inp, stop_gradient=False): - if isinstance(inp, paddle.Tensor): - return inp - return paddle.to_tensor( - self.RAW_INPUTS[inp], stop_gradient=stop_gradient) - - def gen_inputs(self, inputs): - if isinstance(inputs, list): - inputs = [self.gen_input(x) for x in inputs] - else: - inputs = [self.gen_input(inputs)] - return inputs - - def gen_test_pairs(self, - func, - inputs, - v=None, - create_graph=False, - allow_unused=False): - def vjp_test(): - nonlocal v - xs = self.gen_inputs(inputs) - if v is not None: - v = self.gen_inputs(v) - outputs, inputs_grad = vjp(func, - xs, - v, - create_graph=create_graph, - allow_unused=allow_unused) - else: - outputs, inputs_grad = vjp(func, - xs, - create_graph=create_graph, - allow_unused=allow_unused) - return outputs, inputs_grad - - def grad_test(): - nonlocal v - xs = self.gen_inputs(inputs) - if v is not None: - v = self.gen_inputs(v) - outputs = func(*xs) - if v is not None: - inputs_grad = grad( - outputs, - xs, - v, - create_graph=create_graph, - allow_unused=allow_unused) - else: - inputs_grad = grad( - outputs, - xs, - create_graph=create_graph, - allow_unused=allow_unused) - return outputs, inputs_grad - - return vjp_test, grad_test - - def gen_jvp_tests(self, - func, - inputs, - v=None, - create_graph=False, - allow_unused=False): - def jvp_test(): - nonlocal v - xs = self.gen_inputs(inputs) - if v is not None: - v = self.gen_inputs(v) - outputs, outputs_grad = jvp(func, - xs, - v, - create_graph=create_graph, - allow_unused=allow_unused) - else: - outputs, outputs_grad = jvp(func, - xs, - create_graph=create_graph, - allow_unused=allow_unused) - return outputs, outputs_grad - - return jvp_test - - def check_results(self, ref, res): - type_error = 'Result is different than expected in shape or type' - value_error = 'Result is different than expected values' - if ref is None: - self.assertTrue(res is None, type_error) - elif isinstance(ref, paddle.Tensor): - self.assertTrue(isinstance(res, paddle.Tensor), type_error) - self.assertTrue(paddle.allclose(res, ref), value_error) - else: - self.assertTrue(len(res) == len(ref), type_error) - for i in range(len(ref)): - self.check_results(ref[i], res[i]) - return True - - -class TestVJP(TestAutogradFunctional): - def test_vjp_i1o1_no_create_graph(self): - test_cases = [ - [reduce, 'A'], #noqa - [reduce_dim, 'A'], #noqa - ] #noqa - for f, inputs in test_cases: - vjp, grad = self.gen_test_pairs(f, inputs) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - def test_vjp_i2o1_no_create_graph(self): - test_cases = [ - [matmul, ['A', 'B']], #noqa - [mul, ['b', 'c']], #noqa - ] #noqa - for f, inputs in test_cases: - vjp, grad = self.gen_test_pairs(f, inputs) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - def test_vjp_i2o2_no_create_graph(self): - test_cases = [ - [o2, ['A', 'A']], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - v = make_v(f, inputs) - vjp, grad = self.gen_test_pairs(f, inputs, v=v) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - def test_vjp_i2o2_omitting_v_no_create_graph(self): - test_cases = [ - [o2, ['A', 'A']], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - vjp, grad = self.gen_test_pairs(f, inputs) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - def test_vjp_nested_no_create_graph(self): - x = self.gen_input('a') - test_cases = [ - [nested(x), 'a'], #noqa - ] - for f, inputs in test_cases: - vjp, grad = self.gen_test_pairs(f, inputs) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - def test_vjp_aliased_input_no_create_graph(self): - x = self.gen_input('a') - ref = self.gen_test_pairs(nested(x), 'a')[0] - aliased = self.gen_test_pairs(nested(x), x)[0] - ref_result, aliased_result = ref(), aliased() - self.check_results(ref_result, aliased_result) - - def test_vjp_allowunused_no_create_graph(self): - x, y = self.gen_input('A'), self.gen_input('a') - vjp, grad = self.gen_test_pairs(unuse, [x, y], allow_unused=True) - vjp_result, grad_result = vjp(), grad() - self.check_results(grad_result, vjp_result) - - -def jac(grad_fn, f, inputs): - assert grad_fn in [vjp, jvp] - if grad_fn is jvp: - vs = [zeros_like(x) for x in inputs] - else: - outputs = f(*inputs) - if isinstance(outputs, paddle.Tensor): - outputs = [outputs] - vs = [zeros_like(y) for y in outputs] - JJ_cols = [] - for i, v in enumerate(vs): - v = v.flatten() - for j in range(len(v)): - _v = zeros_like(v).detach() - _v[j] = 1.0 - _v = _v.reshape(vs[i].shape) - _vs = vs.copy() - _vs[i] = _v - _, grads = grad_fn(f, inputs, vs) - d_outs = paddle.concat([d_out.flatten() for d_out in grads]) - JJ_cols.append(d_outs) - # JJ is the fully unrolled jacobian - JJ = paddle.stack(JJ_cols) - if grad_fn is vjp: - JJ = JJ.t() - return JJ - - -class TestJVP(TestAutogradFunctional): - def test_jvp_i1o1_no_create_graph(self): - test_cases = [ - [reduce, 'A'], #noqa - [reduce_dim, 'A'], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - forward_jac = jac(jvp, f, inputs) - reverse_jac = jac(vjp, f, inputs) - self.check_results(forward_jac, reverse_jac) - - def test_jvp_i2o1_no_create_graph(self): - test_cases = [ #noqa - [matmul, ['A', 'B']], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - forward_jac = jac(jvp, f, inputs) - reverse_jac = jac(vjp, f, inputs) - self.check_results(forward_jac, reverse_jac) - - def test_jvp_i2o2_no_create_graph(self): - test_cases = [ #noqa - [o2, ['A', 'A']], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - forward_jac = jac(jvp, f, inputs) - reverse_jac = jac(vjp, f, inputs) - self.check_results(forward_jac, reverse_jac) - - def test_jvp_i2o2_omitting_v_no_create_graph(self): - test_cases = [ #noqa - [o2, ['A', 'A']], #noqa - ] #noqa - for f, inputs in test_cases: - inputs = self.gen_inputs(inputs) - results_omitting_v = jvp(f, inputs) - v = [ones_like(x) for x in inputs] - results_with_v = jvp(f, inputs, v) - self.check_results(results_omitting_v, results_with_v) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/utils.py b/python/paddle/fluid/tests/unittests/autograd/utils.py index b06ce6ed7c..0816b57fbf 100644 --- a/python/paddle/fluid/tests/unittests/autograd/utils.py +++ b/python/paddle/fluid/tests/unittests/autograd/utils.py @@ -1,22 +1,33 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import typing +import enum +import sys +import re +import inspect +import functools +import contextlib +import collections import numpy as np import paddle -from paddle.autograd.functional import _tensors +from paddle.autograd.functional import _as_tensors +########################################################## +# Finite Difference Utils +########################################################## def _product(t): if isinstance(t, int): return t @@ -25,7 +36,9 @@ def _product(t): def _get_item(t, idx): - assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." + assert isinstance( + t, + paddle.fluid.framework.Variable), "The first argument t must be Tensor." assert isinstance(idx, int), "The second argument idx must be an int number." flat_t = paddle.reshape(t, [-1]) @@ -33,7 +46,9 @@ def _get_item(t, idx): def _set_item(t, idx, value): - assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." + assert isinstance( + t, + paddle.fluid.framework.Variable), "The first argument t must be Tensor." assert isinstance(idx, int), "The second argument idx must be an int number." flat_t = paddle.reshape(t, [-1]) @@ -42,8 +57,8 @@ def _set_item(t, idx, value): def _compute_numerical_jacobian(func, xs, delta, np_dtype): - xs = _tensors(xs, "xs") - ys = _tensors(func(*xs), "ys") + xs = list(_as_tensors(xs)) + ys = list(_as_tensors(func(*xs))) fin_size = len(xs) fout_size = len(ys) jacobian = list([] for _ in range(fout_size)) @@ -59,11 +74,11 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): orig = _get_item(xs[j], q) x_pos = orig + delta xs[j] = _set_item(xs[j], q, x_pos) - ys_pos = _tensors(func(*xs), "ys_pos") + ys_pos = _as_tensors(func(*xs)) x_neg = orig - delta xs[j] = _set_item(xs[j], q, x_neg) - ys_neg = _tensors(func(*xs), "ys_neg") + ys_neg = _as_tensors(func(*xs)) xs[j] = _set_item(xs[j], q, orig) @@ -76,8 +91,8 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): def _compute_numerical_hessian(func, xs, delta, np_dtype): - xs = _tensors(xs, "xs") - ys = _tensors(func(*xs), "ys") + xs = list(_as_tensors(xs)) + ys = list(_as_tensors(func(*xs))) fin_size = len(xs) hessian = list([] for _ in range(fin_size)) for i in range(fin_size): @@ -107,10 +122,22 @@ def _compute_numerical_hessian(func, xs, delta, np_dtype): return hessian -def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): +def concat_to_matrix(xs, is_batched=False): + """Concats a tuple of tuple of Jacobian/Hessian matrix into one matrix""" + rows = [] + for i in range(len(xs)): + rows.append(np.concatenate([x for x in xs[i]], -1)) + return np.concatenate(rows, 1) if is_batched else np.concatenate(rows, 0) + + +def _compute_numerical_batch_jacobian(func, + xs, + delta, + np_dtype, + merge_batch=True): no_batch_jacobian = _compute_numerical_jacobian(func, xs, delta, np_dtype) - xs = _tensors(xs, "xs") - ys = _tensors(func(*xs), "ys") + xs = list(_as_tensors(xs)) + ys = list(_as_tensors(func(*xs))) fin_size = len(xs) fout_size = len(ys) bs = xs[0].shape[0] @@ -128,7 +155,8 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): for b in range(bs): for q in range(in_size): batch_jac_i_j[p][b][q] = jac[b][p][b][q] - batch_jac_i_j = np.reshape(batch_jac_i_j, (out_size, -1)) + if merge_batch: + batch_jac_i_j = np.reshape(batch_jac_i_j, (out_size, -1)) batch_jac_i.append(batch_jac_i_j) bat_jac.append(batch_jac_i) @@ -136,7 +164,7 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): - xs = _tensors(xs, "xs") + xs = list(_as_tensors(xs)) batch_size = xs[0].shape[0] fin_size = len(xs) hessian = [] @@ -175,8 +203,10 @@ def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): def _compute_numerical_vjp(func, xs, v, delta, np_dtype): - xs = _tensors(xs, "xs") + xs = _as_tensors(xs) jacobian = np.array(_compute_numerical_jacobian(func, xs, delta, np_dtype)) + if v is None: + v = [paddle.ones_like(x) for x in xs] flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) vjp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] for j in range(len(xs)): @@ -188,7 +218,7 @@ def _compute_numerical_vjp(func, xs, v, delta, np_dtype): def _compute_numerical_vhp(func, xs, v, delta, np_dtype): - xs = _tensors(xs, "xs") + xs = list(_as_tensors(xs)) hessian = np.array(_compute_numerical_hessian(func, xs, delta, np_dtype)) flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) vhp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] @@ -198,3 +228,166 @@ def _compute_numerical_vhp(func, xs, v, delta, np_dtype): flat_v) vhp = [vhp[j].reshape(xs[j].shape) for j in range(len(xs))] return vhp + + +########################################################## +# TestCases of different function. +########################################################## +def reduce(x): + return paddle.sum(x) + + +def reduce_dim(x): + return paddle.sum(x, axis=0) + + +def matmul(x, y): + return paddle.matmul(x, y) + + +def mul(x, y): + return x * y + + +def pow(x, y): + return paddle.pow(x, y) + + +def o2(x, y): + return paddle.multiply(x, y), paddle.matmul(x, y.t()) + + +def unuse(x, y): + return paddle.sum(x) + + +def nested(x): + def inner(y): + return x * y + + return inner + + +def square(x): + return x * x + + +########################################################## +# Parameterized Test Utils. +########################################################## + +TEST_CASE_NAME = 'suffix' + + +def place(devices, key='place'): + """A Decorator for a class which will make the class running on different + devices . + + Args: + devices (Sequence[Paddle.CUDAPlace|Paddle.CPUPlace]): Device list. + key (str, optional): Defaults to 'place'. + """ + + def decorate(cls): + module = sys.modules[cls.__module__].__dict__ + raw_classes = { + k: v + for k, v in module.items() if k.startswith(cls.__name__) + } + + for raw_name, raw_cls in raw_classes.items(): + for d in devices: + test_cls = dict(raw_cls.__dict__) + test_cls.update({key: d}) + new_name = raw_name + '.' + d.__class__.__name__ + module[new_name] = type(new_name, (raw_cls, ), test_cls) + del module[raw_name] + return cls + + return decorate + + +def parameterize(fields, values=None): + """Decorator for a unittest class which make the class running on different + test cases. + + Args: + fields (Sequence): The feild name sequence of test cases. + values (Sequence, optional): The test cases sequence. Defaults to None. + + """ + fields = [fields] if isinstance(fields, str) else fields + params = [dict(zip(fields, vals)) for vals in values] + + def decorate(cls): + test_cls_module = sys.modules[cls.__module__].__dict__ + for i, values in enumerate(params): + test_cls = dict(cls.__dict__) + values = { + k: staticmethod(v) if callable(v) else v + for k, v in values.items() + } + test_cls.update(values) + name = cls.__name__ + str(i) + name = name + '.' + \ + values.get('suffix') if values.get('suffix') else name + + test_cls_module[name] = type(name, (cls, ), test_cls) + + for m in list(cls.__dict__): + if m.startswith("test"): + delattr(cls, m) + return cls + + return decorate + + +########################################################## +# Utils for transpose different Jacobian/Hessian matrix format. +########################################################## + +# B is batch size, N is row size, M is column size. +MatrixFormat = enum.Enum('MatrixFormat', ('NBM', 'BNM', 'NMB', 'NM')) + + +def _np_transpose_matrix_format(src, src_format, des_format): + """Transpose Jacobian/Hessian matrix format.""" + supported_format = (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NMB) + if src_format not in supported_format or des_format not in supported_format: + raise ValueError( + f"Supported Jacobian format is {supported_format}, but got src: {src_format}, des: {des_format}" + ) + + src_axis = {c: i for i, c in enumerate(src_format.name)} + dst_axis = tuple(src_axis[c] for c in des_format.name) + + return np.transpose(src, dst_axis) + + +def _np_concat_matrix_sequence(src, src_format=MatrixFormat.NM): + """Convert a sequence of sequence of Jacobian/Hessian matrix into one huge + matrix.""" + + def concat_col(xs): + if src_format in (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NM): + return np.concatenate(xs, axis=-1) + else: + return np.concatenate(xs, axis=1) + + def concat_row(xs): + if src_format in (MatrixFormat.NBM, MatrixFormat.NM, MatrixFormat.NMB): + return np.concatenate(xs, axis=0) + else: + return np.concatenate(xs, axis=1) + + supported_format = (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NMB, + MatrixFormat.NM) + if src_format not in supported_format: + raise ValueError( + f"Supported Jacobian format is {supported_format}, but got {src_format}" + ) + if not isinstance(src, typing.Sequence): + return src + if not isinstance(src[0], typing.Sequence): + src = [src] + return concat_row(tuple(concat_col(xs) for xs in src)) diff --git a/python/paddle/incubate/__init__.py b/python/paddle/incubate/__init__.py index 83dad710ba..182aae40f2 100644 --- a/python/paddle/incubate/__init__.py +++ b/python/paddle/incubate/__init__.py @@ -26,6 +26,7 @@ from .tensor import segment_mean from .tensor import segment_max from .tensor import segment_min from .passes import fuse_resnet_unit_pass +import paddle.incubate.autograd from . import nn #noqa: F401 diff --git a/python/paddle/incubate/autograd/__init__.py b/python/paddle/incubate/autograd/__init__.py new file mode 100644 index 0000000000..5528bb4d06 --- /dev/null +++ b/python/paddle/incubate/autograd/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddle.autograd.functional import Hessian, Jacobian, jvp, vjp + +__all__ = [ # noqa + 'vjp', 'jvp', 'Jacobian', 'Hessian' +] diff --git a/python/setup.py.in b/python/setup.py.in index 3e59e22fcb..7f311feb4e 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -273,6 +273,7 @@ packages=['paddle', 'paddle.distributed.ps', 'paddle.distributed.ps.utils', 'paddle.incubate', + 'paddle.incubate.autograd', 'paddle.incubate.optimizer', 'paddle.incubate.checkpoint', 'paddle.incubate.operators', diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index dd6a4ad288..44dc4eac26 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -12,55 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -e -set +x -NIGHTLY_MODE=$1 -PRECISION_TEST=$2 -WITH_GPU=$3 - -export PADDLE_ROOT="$(cd "$PWD/../" && pwd )" -if [ ${NIGHTLY_MODE:-OFF} == "ON" ]; then - nightly_label="" -else - nightly_label="(RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY)" - echo "=========================================" - echo "Unittests with nightly labels are only run at night" - echo "=========================================" -fi - -if disable_ut_quickly=$(python ${PADDLE_ROOT}/tools/get_quick_disable_lt.py); then - echo "=========================================" - echo "The following unittests have been disabled:" - echo ${disable_ut_quickly} - echo "=========================================" -else - disable_ut_quickly='' -fi - -# check added ut -set +e -cp $PADDLE_ROOT/tools/check_added_ut.sh $PADDLE_ROOT/tools/check_added_ut_win.sh -bash $PADDLE_ROOT/tools/check_added_ut_win.sh -rm -rf $PADDLE_ROOT/tools/check_added_ut_win.sh -if [ -f "$PADDLE_ROOT/added_ut" ];then - added_uts=^$(awk BEGIN{RS=EOF}'{gsub(/\n/,"$|^");print}' $PADDLE_ROOT/added_ut)$ - ctest -R "(${added_uts})" --output-on-failure -C Release --repeat-until-fail 3;added_ut_error=$? - rm -f $PADDLE_ROOT/added_ut - if [ "$added_ut_error" != 0 ];then - echo "========================================" - echo "Added UT should pass three additional executions" - echo "========================================" - exit 8; - fi - if nvcc --version | grep 11.2; then - echo "Only test added_ut temporarily when running in CI-Windows-inference of CUDA 11.2." - exit 0; - fi -fi -set -e - -# /*==================Fixed Disabled Windows GPU MKL unittests==============================*/ +# /*================Fixed Disabled Windows CUDA10.x MKL(PR-CI-Windows) unittests===========================*/ # TODO: fix these unittest that is bound to fail disable_wingpu_test="^test_model$|\ ^test_dataloader_early_reset$|\ @@ -97,7 +50,7 @@ disable_wingpu_test="^test_model$|\ ^test_bilinear_interp_op$|\ ^disable_wingpu_test$" -# /*==================Fixed Disabled Windows GPU MKL unittests==============================*/ +# /*=================Fixed Disabled Windows TRT MKL unittests=======================*/ # TODO: fix these unittest that is bound to fail disable_win_trt_test="^test_trt_convert_conv2d$|\ ^test_trt_convert_conv2d_fusion$|\ @@ -119,7 +72,13 @@ disable_win_trt_test="^test_trt_convert_conv2d$|\ ^test_trt_convert_matmul$|\ ^test_trt_convert_scale$" -# /*==================Fixed Disabled Windows GPU inference_api_test unittests==============================*/ +# /*=============Fixed Disabled Windows CUDA11.x MKL(PR-CI-Windows-Inference) unittests=================*/ +# TODO: fix these unittest that is bound to fail +disable_wingpu11_test="^test_autograd_functional_dynamic$|\ +^disable_wingpu_test$" + + +# /*==========Fixed Disabled Windows CUDA11.x inference_api_test(PR-CI-Windows-Inference) unittests=============*/ disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\ ^test_trt_dynamic_shape_ernie$|\ ^test_trt_dynamic_shape_ernie_fp16_ser_deser$|\ @@ -128,9 +87,8 @@ disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\ ^lite_mul_model_test$|\ ^paddle_infer_api_copy_tensor_tester$" -# /*============================================================================*/ -# /*==================Fixed Disabled Windows CPU OPENBLAS unittests==============================*/ +# /*==========Fixed Disabled Windows CPU OPENBLAS((PR-CI-Windows-OPENBLAS)) unittests==============================*/ # TODO: fix these unittest that is bound to fail disable_wincpu_test="^jit_kernel_test$|\ ^test_analyzer_transformer$|\ @@ -189,6 +147,58 @@ long_time_test="^test_gru_op$|\ ^test_trt_matmul_quant_dequant$|\ ^test_strided_slice_op$" + +# /*============================================================================*/ + +set -e +set +x +NIGHTLY_MODE=$1 +PRECISION_TEST=$2 +WITH_GPU=$3 + +export PADDLE_ROOT="$(cd "$PWD/../" && pwd )" +if [ ${NIGHTLY_MODE:-OFF} == "ON" ]; then + nightly_label="" +else + nightly_label="(RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY)" + echo "=========================================" + echo "Unittests with nightly labels are only run at night" + echo "=========================================" +fi + +if disable_ut_quickly=$(python ${PADDLE_ROOT}/tools/get_quick_disable_lt.py); then + echo "=========================================" + echo "The following unittests have been disabled:" + echo ${disable_ut_quickly} + echo "=========================================" +else + disable_ut_quickly='' +fi + +# check added ut + +set +e +cp $PADDLE_ROOT/tools/check_added_ut.sh $PADDLE_ROOT/tools/check_added_ut_win.sh +bash $PADDLE_ROOT/tools/check_added_ut_win.sh +rm -rf $PADDLE_ROOT/tools/check_added_ut_win.sh +if [ -f "$PADDLE_ROOT/added_ut" ];then + added_uts=^$(awk BEGIN{RS=EOF}'{gsub(/\n/,"$|^");print}' $PADDLE_ROOT/added_ut)$ + ctest -R "(${added_uts})" -E "$disable_wingpu11_test" --output-on-failure -C Release --repeat-until-fail 3;added_ut_error=$? + rm -f $PADDLE_ROOT/added_ut + if [ "$added_ut_error" != 0 ];then + echo "========================================" + echo "Added UT should pass three additional executions" + echo "========================================" + exit 8; + fi + if nvcc --version | grep 11.2; then + echo "Only test added_ut temporarily when running in CI-Windows-inference of CUDA 11.2." + exit 0; + fi +fi +set -e + + if [ ${WITH_GPU:-OFF} == "ON" ];then export CUDA_VISIBLE_DEVICES=0 -- GitLab