未验证 提交 9e764d82 编写于 作者: X Xiaoxu Chen 提交者: GitHub

Enhance vjp/jvp/Jacobian/Hessian API for supporting dynamic, static graph and...

Enhance vjp/jvp/Jacobian/Hessian API for supporting dynamic, static graph and batched, unbatched mode (#40692)

* modify vjp/jvp for both dynamic and static graph

* enforce jacobian class for supporting first/last batch

* add unittest for jvp, jacobian withlast batch, jacobian with first batch

* fix the incorrect shape when multi-index Jacobian

* enforce Hessian class for supporting dynamic graph

* add Hessian class unittest

* bugfix, jvp double_backward_trick zeros_like return stop_gradient=True in static graph

* add API beta warnnings

* add white_list for cuda11.x ci windows.

* optimize some code snippets and documments

* set unittest timeout to 100 seconds

* move vjp,jvp,Jacobian,Hessian to incubate

* fix vjp,vjp import path of sample code

* fix code style error of augtograd/__init__ file
上级 ab8c33b1
...@@ -13,12 +13,18 @@ ...@@ -13,12 +13,18 @@
# limitations under the License. # limitations under the License.
from ..fluid.dygraph.base import grad # noqa: F401 from ..fluid.dygraph.base import grad # noqa: F401
from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
from ..framework import is_grad_enabled, set_grad_enabled # noqa: F401
from . import backward_mode # noqa: F401 from . import backward_mode # noqa: F401
from .backward_mode import backward # noqa: F401 from .backward_mode import backward # noqa: F401
from .py_layer import PyLayer, PyLayerContext, EagerPyLayer, EagerPyLayerContext # noqa: F401 from .py_layer import PyLayer, PyLayerContext, EagerPyLayer, EagerPyLayerContext # noqa: F401
from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401 from ..framework import set_grad_enabled, is_grad_enabled # noqa: F401
from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401 from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
from .functional import jacobian, hessian, batch_jacobian, batch_hessian # noqa: F401 from .functional import vjp, jvp, Jacobian, Hessian # noqa: F401
from .functional import vjp, jvp, vhp # noqa: F401 from .functional import jacobian, hessian, batch_jacobian, batch_hessian, vhp # noqa: F401
__all__ = ['backward', 'PyLayer', 'PyLayerContext'] __all__ = [ # noqa
'backward',
'PyLayer',
'PyLayerContext',
]
...@@ -12,236 +12,686 @@ ...@@ -12,236 +12,686 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import contextlib import functools
import paddle import typing
from paddle.static import gradients
from ..fluid import framework
from ..fluid.dygraph import grad
from ..tensor.creation import assign
from ..tensor import reshape, zeros_like, to_tensor
from .utils import _tensors, _stack_tensor_or_return_none, _replace_none_with_zero_tensor
@contextlib.contextmanager import paddle
def gradient_scope(*var_lists, create_graph=False, allow_unused=False): from paddle.fluid import framework
def grad_fn(ys, xs, v=None, create_graph=create_graph):
if v is not None:
assert len(ys) == len(v), (
f'The argument {v} is expected to be of the same size as the output. '
f'Here the output is {ys}, and `v` is {v}.')
if allow_unused:
ys = [
to_tensor(
[0.0], stop_gradient=False) if y is None else y for y in ys
]
return grad(
ys, xs, v, create_graph=create_graph, allow_unused=allow_unused)
def return_fn(out):
if isinstance(out, paddle.Tensor):
if not create_graph:
out = out.detach()
return out
if isinstance(out, list):
return list(return_fn(x) for x in out)
elif isinstance(out, tuple):
return tuple(return_fn(x) for x in out)
else:
assert out is None
return out
def process(vl):
if vl is None:
return None
out = []
# If v is treated as constant in the outer scope, its gradient is guaranteed
# not to be taken beyond this scope. Within this scope, however, v's gradient
# may be computed. We only need to detach v in this case.
# Otherwise, v's gradient is valid, and is subject to update beyond this scope.
# In this case we must not confuse the gradient in the outer scope with the
# inner one's. Moreover, we need to make sure that the result from the inner
# scope can flow back to the outer scope. This can be satisfied by extending
# the original variable with a duplication operation v1 = v so that v still
# maintains the complete lineage.
for v in vl:
if v is None:
out.append(v)
continue
if create_graph and not v.stop_gradient:
v = assign(v)
else:
v = v.detach()
v.stop_gradient = False
out.append(v)
return out
try:
var_lists = [process(vl) for vl in var_lists]
bundle = var_lists + [grad_fn, return_fn]
yield bundle
finally:
pass
@framework.dygraph_only def vjp(func, xs, v=None):
def vjp(func, inputs, v=None, create_graph=False, allow_unused=False):
r"""Computes the Vector-Jacobian product, a functional form of r"""Computes the Vector-Jacobian product, a functional form of
reverse mode automatic differentiation. reverse mode automatic differentiation.
Warning:
This API is in beta, the signatures could be changed in future version.
Args: Args:
func(Callable): `func` takes as input a tensor or a list/tuple func(Callable): A function that takes ``xs`` as inputs parameter and
of tensors and returns a tensor or a list/tuple of tensors. returns a sequence of Tensors or a Tensor.
inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional xs(Tensor|Sequence[Tensor]): Used as positional arguments to evaluate
arguments to evaluate `func`. `inputs` is accepted as one ``func``. ``xs`` is accepted as one Tensor or a sequence of Tensors.
tensor or a list of tensors. v(Tensor|Sequence[Tensor]|None, optional): The cotangent vector invovled
v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the in the VJP computation. ``v`` matches the size and shape of
cotangent vector invovled in the VJP computation. `v` matches ``func`` 's output. Defaults to None, which is equivalent to all
the size and shape of `func`'s output. Default value is None ones the same size of ``func`` 's output.
and in this case is equivalent to all ones the same size
of `func`'s output.
create_graph(bool, optional): if `True`, gradients can be
evaluated on the results. If `False`, taking gradients on
the results is invalid. Default value is False.
allow_unused(bool, optional): In case that some Tensors of
`inputs` do not contribute to the computation of the output.
If `allow_unused` is False, an error will be raised,
Otherwise, the gradients of the said inputs are returned
None. Default value is False.
Returns: Returns:
output(tuple): output(tuple):
func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of
`func(inputs)` - func_out(Tensor|tuple[Tensor]): The output of ``func(xs)`` .
vjp(list[Tensor]): the pullback results of `v` on `func` - vjp(Tensor|tuple[Tensor]): The vjp result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
def func(x): def func(x):
return paddle.matmul(x, x) return paddle.matmul(x, x)
x = paddle.ones(shape=[2, 2], dtype='float32') x = paddle.ones(shape=[2, 2], dtype='float32')
output, inputs_grad = vjp(func, x) _, vjp_result = paddle.incubate.autograd.vjp(func, x)
print(inputs_grad) print(vjp_result)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[4., 4.], # [[4., 4.],
# [4., 4.]])] # [4., 4.]])
v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]]) v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
output, inputs_grad = vjp(func, x, v) _, vjp_result = paddle.incubate.autograd.vjp(func, x, v)
print(inputs_grad) print(vjp_result)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[2., 1.],
# [1., 0.]])]
output, inputs_grad = vjp(func, x, v, create_graph=True)
print(inputs_grad)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False,
# [[2., 1.], # [[2., 1.],
# [1., 0.]])] # [1., 0.]])
y = paddle.ones(shape=[2, 2], dtype='float32')
def func_unused(x, y):
return paddle.matmul(x, x)
output, inputs_grad = vjp(func, [x, y], v)
# ValueError: (InvalidArgument) The 1-th input does not appear in the backward graph.
# Please check the input variable or set allow_unused=True to get None result.
# [Hint: Expected allow_unused_ == true, but received allow_unused_:0 != true:1.]
output, inputs_grad = vjp(func, [x, y], v, allow_unused=True)
print(inputs_grad)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [[2., 1.],
# [1., 0.]]), None]
""" """
xs = _tensors(inputs, "inputs") _check_inputs(func, xs, v)
if v is not None:
v = _tensors(v, "v")
with gradient_scope( # ``_seprate`` breaks the dependencies between ``xs`` and other
xs, v, create_graph=create_graph, # variables. See more ``_seprate`` .
allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]: xs, v = _separate(xs), _separate(v)
outputs = func(*xs) ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs)
ys = _tensors(outputs, "outputs") _check_v_shape(v, ys)
grads = grad_fn(ys, xs, v)
outputs, grads = return_fn(outputs), return_fn(grads)
return outputs, grads return ys, _grad(ys, xs, v)
@framework.dygraph_only def jvp(func, xs, v=None):
def jvp(func, inputs, v=None, create_graph=False, allow_unused=False):
r""" r"""
Computes the Jacobian-Vector product for a function at the given Computes the Jacobian-Vector product for a function at the given
inputs and a vector in the tangent space induced by the inputs. inputs and a vector in the tangent space induced by the inputs.
.. note:: Warning:
**This API is ONLY available in imperative mode.** This API is in beta, the signatures could be changed in future version.
Args: Args:
func(Callable): `func` takes as input a tensor or a list/tuple func(Callable): The ``func`` takes as input a Tensor or a Sequence
of tensors and returns a tensor or a list/tuple of tensors. of Tensors and returns a Tensor or a Sequence of Tensors.
inputs(list[Tensor]|tuple[Tensor]|Tensor): used as positional xs(Tensor|Sequence[Tensor]): Used as positional arguments to
arguments to evaluate `func`. `inputs` is accepted as one evaluate ``func``. The ``xs`` is accepted as one Tensor or a
tensor or a list/tuple of tensors. Sequence of Tensors.
v(list[Tensor]|tuple[Tensor]|Tensor|None, optional): the v(Tensor|Sequence[Tensor]|None, Optional): The tangent vector invovled
tangent vector invovled in the JVP computation. `v` matches in the JVP computation. The ``v`` matches the size and shape of
the size and shape of `inputs`. `v` is Optional if `func` ``xs`` . Default value is None and in this case is equivalent to
returns a single tensor. Default value is None and in this all ones the same size of ``xs`` .
case is equivalent to all ones the same size of `inputs`.
create_graph(bool, optional): if `True`, gradients can
be evaluated on the results. If `False`, taking gradients
on the results is invalid. Default value is False.
allow_unused(bool, optional): In case that some Tensors of
`inputs` do not contribute to the computation of the output.
If `allow_unused` is False, an error will be raised,
Otherwise, the gradients of the said inputs are returned
None. Default value is False.
Returns: Returns:
output(tuple): output(tuple):
func_out(list[Tensor]|tuple[Tensor]|Tensor): the output of
`func(inputs)` - func_out(Tensor|tuple[Tensor]): The output of ``func(xs)`` .
jvp(list[Tensor]): the pullback results of `v` on `func` - jvp(Tensor|tuple[Tensor]): The jvp result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
def func(x): def func(x):
return paddle.matmul(x, x) return paddle.matmul(x, x)
x = paddle.ones(shape=[2, 2], dtype='float32') x = paddle.ones(shape=[2, 2], dtype='float32')
_, jvp_result = paddle.incubate.autograd.jvp(func, x)
print(jvp_result)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[4., 4.],
# [4., 4.]])
v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
_, jvp_result = paddle.incubate.autograd.jvp(func, x, v)
print(jvp_result)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[2., 1.],
# [1., 0.]])
output, inputs_grad = jvp(func, x) """
print(inputs_grad) _check_inputs(func, xs, v)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # ``_seprate`` breaks the dependencies between ``xs`` and other
# [[2., 2.], # variables. See more ``_seprate`` .
# [2., 2.]])] xs, v = _separate(xs), _separate(v)
ys = func(*xs) if isinstance(xs, typing.Sequence) else func(xs)
_check_v_shape(v, xs)
return ys, _double_backward_trick(ys, xs, v)
def _double_backward_trick(ys, xs, v):
"""Double backward trick for computing ``jvp`` by ``vjp``
see details: https://j-towns.github.io/2017/06/12/A-new-trick.html
"""
# The value of ys_grad is not important, it can be any random value in
# theory, but it's required to set stop_gradient=False.
ys_grad = _zeros_like_with_grad(ys)
xs_grad = _grad(ys, xs, ys_grad)
return _grad(xs_grad, ys_grad, v)
v = paddle.to_tensor([[1.0, 0.0], [0.0, 0.0]])
output, inputs_grad = vjp(func, x, v)
print(inputs_grad)
# [Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
# [[1., 1.],
# [0., 0.]])]
def _zeros_like_with_grad(xs):
"""Create a zero or zeros sequence Tensor like ``xs`` with a flag
``stop_graident=False`` .
""" """
xs = _tensors(inputs, "inputs") if not isinstance(xs, typing.Sequence):
if v is not None: ys = paddle.zeros_like(xs)
v = _tensors(v, "v") ys.stop_gradient = False
else:
ys = []
for x in xs:
y = paddle.zeros_like(x)
y.stop_gradient = False
ys.append(y)
return ys
with gradient_scope(
xs, v, create_graph=create_graph,
allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]:
outputs = func(*xs)
ys = _tensors(outputs, "outputs")
ys_grad = [zeros_like(y) for y in ys]
xs_grad = grad_fn(ys, xs, ys_grad, create_graph=True)
ys_grad = grad_fn(xs_grad, ys_grad, v)
outputs, ys_grad = return_fn(outputs), return_fn(ys_grad)
return outputs, ys_grad class Jacobian(object):
r"""
Computes the Jacobian matrix of a given function.
If the function has multiple inputs and multiple outputs, during internal
implementation, all input tensors are concatenated after being flatten,
the batch dimension is retained, and the output is subject to the same
processing rules.
Once the Jacobian ``J`` is constructed, you can use a multidimensional index
to retrieve the submatrix of ``J``, as same as slicing a Tensor. The
submatrix is lazily evaluated along row axis, and will be cached once
evaluated.
For examples, supposing ``is_batched=True``, you can retrieve the submatrix
by following methods:
* J[:], retrieving the full matrix.
* J[:, :, j], retrieving the partial derivatives w.r.t. the j'th input
variable.
* J[:, i, :], retrieving the partial derivatives w.r.t. the i'th output
variable.
* J[:, i, j], retrieving the partial derivatives w.r.t. the i'th output
variable and the j'th input variable.
Notes:
Eclipsis index is not supported currently.
Warning:
This API is in beta, the signatures could be changed in future version.
Args:
func (Callable): A python function that takes a Tensor or a sequence of
Tensors as inputs(the first dimension is batch size) and
returns a Tensor a sequence of Tensors.
xs (Tensor|Sequence[Tensor]): The input to the function ``func`` .
is_batched (bool): If true, the first axis is batch axis. Defaults to
False.
Returns:
Jacobian (Object): A python object retains the Jacobian matrix.
Examples:
.. code-block:: python
import paddle
def func(x, y):
return paddle.matmul(x, y)
x = paddle.to_tensor([[1., 2.], [3., 4.]])
J = paddle.incubate.autograd.Jacobian(func, [x, x])
print(J[:, :])
# Tensor(shape=[4, 8], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[1., 3., 0., 0., 1., 0., 2., 0.],
# [2., 4., 0., 0., 0., 1., 0., 2.],
# [0., 0., 1., 3., 3., 0., 4., 0.],
# [0., 0., 2., 4., 0., 3., 0., 4.]])
print(J[0, :])
# Tensor(shape=[8], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [1., 3., 0., 0., 1., 0., 2., 0.])
print(J[:, 0])
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [1., 2., 0., 0.])
"""
def __init__(self, func, xs, is_batched=False):
if not is_batched:
self._jacobian = _JacobianNoBatch(func, xs)
else:
self._jacobian = _JacobianBatchFirst(func, xs)
def __getitem__(self, indexes):
return self._jacobian[indexes]
@property
def shape(self):
"""The shape of flattened Jacobian matrix.
"""
return self._jacobian.shape
class Hessian(object):
"""
Computes the Hessian matrix with a given ``func`` with respect to ``xs`` .
If the function has multiple inputs, during internal implementation,
all input tensors are concatenated after being flatten, the batch dimension
is retained.
The Hessian submatrix is lazily evaluated, and can be retrieved with a
multidimensional indexes. See details ``Jacobian`` .
Warning:
This API is in beta, the signatures could be changed in future version.
Args:
func (Callable): A python function that takes a Tensor or a Tensor
sequence as inputs and returns a Tensor with shape
``[batch_size, 1]`` with batch or ``[1]`` without batch.
xs (Tensor|Sequence(Tensor)): The input Tensor or Tensor sequence of
the function ``func``.
is_batched (bool): If true, the first axis is batch axis. Defaults to
False.
Returns:
Hessian (Object): A python object retains the Hessian matrix.
Examples:
.. code-block:: python
import paddle
def reducer(x):
return paddle.sum(x * x)
x = paddle.rand([2, 2])
h = paddle.incubate.autograd.Hessian(reducer, x)
print(h[:])
# Tensor(shape=[4, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[2., 0., 0., 0.],
# [0., 2., 0., 0.],
# [0., 0., 2., 0.],
# [0., 0., 0., 2.]])
"""
def __init__(self, func, xs, is_batched=False):
def _jac_func(*xs):
jac = Jacobian(func, xs, is_batched=is_batched)
if (is_batched and jac.shape[1] != 1) or (not is_batched and
jac.shape[0] != 1):
raise RuntimeError(
"The function given to Hessian shoud return as single element Tensor or batched single element Tensor."
)
return jac[:, 0, :] if is_batched else jac[0, :]
self.symbolic = Jacobian(_jac_func, xs, is_batched=is_batched)
def __getitem__(self, indexes):
return self.symbolic[indexes]
@property
def shape(self):
"""The shape of flattened Hessian matrix.
"""
return self.symbolic.shape
class _Jacobian(object):
"""The base class for computing Jacobian matrix.
``_Jacobian`` implementes the core logic of multidimensional index and lazy
evaluation for Jacobian matrix, subclass only need to overwrite following
methods:
* ``_lazy_axis()``, return the axis along which will be lazy
evaluating.
* ``_flatten(xs)``, flattens the inputs ``xs``.
* ``_evaluate(index)``, evaluates one slice along ``_lazy_axis`` .
Notes:
Because currently PaddlePaddle only support reverse differentiation by
``paddle.grad``, so lazy evaluation is only supported along the row of
Jacobian matrix, which means that slicing along row will get better
performance.
"""
def __init__(self, func, xs):
self._xs = _separate(xs)
self._ys = func(*_as_tensors(self._xs))
self._flatten_xs = self._flatten(_as_tensors(self._xs))
self._flatten_ys = self._flatten(_as_tensors(self._ys))
self._cache = {}
@property
def shape(self):
raise NotImplementedError
@property
def _lazy_axis(self):
""""The axis of lazily evaluated."""
raise NotImplementedError
def _lazy_indexes(self, indexes):
idx = indexes[self._lazy_axis]
return (idx, ) if isinstance(
idx, int) else tuple(range(idx.start, idx.stop, idx.step))
def _flatten(self, xs):
raise NotImplementedError
def _shifted_indexes(self, indexes, lazy_axis_size=0):
idx = indexes[self._lazy_axis]
shifted_lazy_axis_idx = 0 if isinstance(
idx, int) else slice(0, lazy_axis_size, 1)
return indexes[:self._lazy_axis] + (shifted_lazy_axis_idx,
) + indexes[self._lazy_axis + 1:]
def __getitem__(self, indexes):
indexes = _multi_index(indexes, self.shape)
if isinstance(indexes[self._lazy_axis], int):
other_indexes = indexes[:self._lazy_axis] + \
indexes[self._lazy_axis+1:]
return self._cached_evaluate(indexes[self._lazy_axis])[
other_indexes]
lazy_indexes = self._lazy_indexes(indexes)
part_jac = paddle.stack(
[self._cached_evaluate(i) for i in lazy_indexes],
axis=self._lazy_axis)
return part_jac[self._shifted_indexes(indexes, len(lazy_indexes))]
def _cached_evaluate(self, k):
v = self._cache.get(k)
if v is None:
v = self._evaluate(k)
self._cache[k] = v
return v
def _evaluate(self, index):
"""Evaluate one slice at along lazy axis."""
raise NotImplementedError
class _JacobianNoBatch(_Jacobian):
"""Compute Jacobian matrix without batch dimension.
Suppose the mapping is :math:`f: R^M \to R^N`, the output shape is
``(N, M)`` .
"""
def __init__(self, func, xs):
super(_JacobianNoBatch, self).__init__(func, xs)
@property
def shape(self):
return (self._flatten_ys.shape[0], self._flatten_xs.shape[0])
@property
def _lazy_axis(self):
return 0
def _flatten(self, xs):
return paddle.concat(tuple(x.reshape((-1, )) for x in xs))
def _evaluate(self, row_index):
return self._flatten(_grad(
self._flatten_ys[row_index],
self._xs, ))
class _JacobianBatchLast(_Jacobian):
"""Compute Jacobian matrix with batch at last axis.
Suppose the mapping is :math:`f: R^{M,B} \to R^{N,B}`, the output shape is
``(N, M, B)`` .
"""
def __init__(self, func, xs):
super(_JacobianBatchLast, self).__init__(func, xs)
@property
def shape(self):
return (self._flatten_ys.shape[0], self._flatten_xs.shape[0],
self._flatten_xs.shape[1])
@property
def _lazy_axis(self):
return 0
def _flatten(self, xs):
return paddle.concat(
tuple(x.reshape((-1, x.shape[-1])) for x in _as_tensors(xs)), 0)
def _evaluate(self, row):
return self._flatten(_grad(self._flatten_ys[row, :], self._xs))
class _JacobianBatchFirst(_Jacobian):
"""Compute Jacobian matrix with batch at first axis.
Suppose the mapping is :math:`f: R^{B,M} \to R^{B,N}`, the output shape is
``(B, N, M)`` .
"""
def __init__(self, func, xs):
super(_JacobianBatchFirst, self).__init__(func, xs)
@property
def shape(self):
return (self._flatten_xs.shape[0], self._flatten_ys.shape[1],
self._flatten_xs.shape[1])
@property
def _lazy_axis(self):
return 1
def _flatten(self, xs):
return paddle.concat(
tuple(x.reshape((x.shape[0], -1)) for x in _as_tensors(xs)), 1)
def _evaluate(self, row_index):
return self._flatten(_grad(self._flatten_ys[:, row_index], self._xs))
def _multi_index(indexes, shape):
"""A tool for parsing N-dimensional index into a standard format.
Currently supporting following input format:
* ([positive|negative|slice], ...), the right-most elements can be
omited.
The standard format after converted is slice tuple which contains N elements:
* ([positive|slice], ..., [positive|slice])
Notes:
Ellipsis indexes such as ``(..., i), (i, ...)`` is not supported.
Args:
indexes (tuple): The input indexes.
shape (tuple): The input shape.
Returns:
tuple: The standard format index as the above description.
"""
indexes = indexes if isinstance(indexes, typing.Sequence) else (indexes, )
if any(isinstance(i, type(Ellipsis)) for i in indexes):
raise IndexError('Ellipsis index currently is not supported.')
# Fill the right-most elements.
indexes = indexes + (slice(0, None, None), ) * (len(shape) - len(indexes))
# Convert to positive index.
positive_indexes = []
for i, index in enumerate(indexes):
if isinstance(index, slice):
index = slice(index.start or 0, index.stop or shape[i],
index.step or 1)
positive_indexes.append(
slice(
index.start + shape[i] if index.start < 0 else index.start,
index.stop + shape[i] if index.stop < 0 else index.stop,
# Negative step means index backward, no need to convert to
# positive interger.
index.step))
elif isinstance(index, int):
positive_indexes.append(index + shape[i] if index < 0 else index)
else:
raise TypeError(f'Not supported index type {index}.')
return tuple(positive_indexes)
def _as_tensors(xs):
return (xs, ) if isinstance(xs, framework.Variable) else xs
def _stack_tensor_or_return_none(origin_list):
assert len(origin_list) > 0, "Can't not stack an empty list"
return paddle.stack(
origin_list, axis=0) if isinstance(
origin_list[0], paddle.fluid.framework.Variable) else None
def _replace_none_with_zero_tensor(xs, refs):
if xs is None:
xs = paddle.zeros_like(refs)
xs.stop_gradient = refs.stop_gradient
return xs
elif isinstance(xs, typing.Sequence):
return tuple(
_replace_none_with_zero_tensor(x, refs[i])
for i, x in enumerate(xs))
else:
return xs
def _grad(ys, xs, v=None):
"""A gradient function that can be used in dynamic graph and static graph.
The ``grad`` combines ``paddle.grad`` used in dynamic graph and
``paddle.static.gradients`` used in static graph, and do following changes:
* The ``allow_unused`` flag is removed and set defaults to true internally,
none in outputs will be replaced by zero tensor.
* The ``create_graph`` flag is removed and set defaults to true internally,
only makes sense in dynamic graph.
* When xs is a single Tensor, ``paddle.grad`` returns a list which only
contains one Tensor. It may confuse users, thus in this case we improve
to return a single Tensor in _grad interface.
Args:
ys (Tensor|Sequence[Tensor]): The output tensor or tensor sequence of
the graph to compute gradients.
xs (Tensor|Sequence[Tensor]): The input tensor or tensor sequence of the graph to
compute gradients. The returned values of this API are the
gradients of inputs .
v (Tensor|Sequence[Tensor]|None,optional): The initial gradient values
of outputs . If grad_outputs is None, the initial gradient values of
outputs would be Tensors filled with 1; if grad_outputs is not None,
it must have the same length as outputs , and in this case, the
initial gradient value of the i-th outputs would be: (1) a Tensor
filled with 1 when the i-th element of grad_outputs is None;
(2) the i-th element of grad_outputs when the i-th element of
grad_outputs is a Tensor. Default None.
Returns:
Tensor|tuple[Tensor]: Tensor or a tuple of Tensors, whose length is the
same as the Tensor number inside inputs, and the i-th returned
Tensor is the sum of gradients of outputs with respect to the i-th
inputs.
"""
if paddle.fluid._non_static_mode():
xs_grad = paddle.grad(ys, xs, v, create_graph=True, allow_unused=True)
else:
xs_grad = paddle.static.gradients(ys, xs, v)
if isinstance(xs, paddle.fluid.framework.Variable):
xs_grad = xs_grad[0]
return _replace_none_with_zero_tensor(xs_grad, xs)
def _separate(xs):
"""
``_separate`` separates ``xs`` from the computation graph through ``clone``
or ``deteach`` .
Interally, ``paddle.grad(xs, ys)`` is stateful API implemented based on
computional graph, which will reduce gradients along all path from ys to xs.
However, funcional autograd API such as ``vjp``, ``jvp`` is stateless, and
only compute gradients with a given ``func`` .
For example, given a ``func`` :math:`y0=f(x0)`, supposing forward path is:
``x0 -> y0``, ``x0 -> x1 -> y0`` .
``paddle.grad(y0, x0)`` will reduce gradients along ``y0->x0`` and
``y0->x1->x0``, and ``vjp`` only need reduce along ``y0->x0``.
So, it's needed to clone or detach xs for breaking the dependencies with
other variables.
Examples:
.. code-block:: python
import paddle
from paddle.autograd.functional import _separate
def func(x, y):
return x * y
x = paddle.ones((1,))
x.stop_gradient = False
y = func(x, x)
print(paddle.grad(y, x))
# [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [2.])]
x1, x2 = _separate((x, x))
y = func(x1, x2)
print(paddle.grad(y, x1))
# [Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [1.])]
"""
if isinstance(xs, typing.Sequence):
return tuple(_single_separate(x) for x in xs)
else:
return _single_separate(xs)
def _single_separate(x):
if x is None: # x maybe none because grad input's v defaults to none.
return x
if not x.stop_gradient:
return paddle.clone(x)
else: # use detach to share memory when no need gradients.
x = x.detach()
x.stop_gradient = False
return x
return x
def _check_inputs(func, xs, v=None):
if not callable(func):
raise TypeError(f"Expected 'fun' is Callable, but got {type(func)}.")
if not isinstance(xs, (framework.Variable, typing.Sequence)):
raise TypeError(f"Expected 'xs' is a Tensor|Sequence[Tensor],"
f"but got {type(xs)}.")
if isinstance(xs, typing.Sequence) and not all(
isinstance(x, framework.Variable) for x in xs):
raise TypeError("All elements of 'xs' shoule be Tensor.")
if not isinstance(v, (framework.Variable, typing.Sequence, type(None))):
raise TypeError(
f"Expected 'v' is Tensor|Sequence[Tensor]|None, but got {type(v)}.")
if isinstance(v, typing.Sequence) and not all(
isinstance(e, framework.Variable) for e in v):
raise TypeError("All elements of 'xs' shoule be Tensor.")
def _check_v_shape(v, refs):
if v is None:
return
v, refs = _as_tensors(v), _as_tensors(refs)
if len(refs) != len(v):
raise RuntimeError(f"The argument v is a tuple of invalid length:"
f"should be {len(refs)} but got {len(v)}.")
for index, (element_v, element_ref) in enumerate(zip(v, refs)):
if element_v.shape != element_ref.shape:
raise RuntimeError(
f"The v[{index}] has invalid shape: should "
f"be {element_ref.shape} but got {element_v.shape}.")
@framework.dygraph_only @framework.dygraph_only
...@@ -354,16 +804,18 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -354,16 +804,18 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False):
# [0., 0., 0., 2.]]), None)) # [0., 0., 0., 2.]]), None))
''' '''
inputs = _tensors(inputs, "inputs") inputs = _as_tensors(inputs)
outputs = _tensors(func(*inputs), "outputs") outputs = _as_tensors(func(*inputs))
fin_size = len(inputs) fin_size = len(inputs)
fout_size = len(outputs) fout_size = len(outputs)
flat_outputs = tuple(reshape(output, shape=[-1]) for output in outputs) flat_outputs = tuple(
paddle.reshape(
output, shape=[-1]) for output in outputs)
jacobian = tuple() jacobian = tuple()
for i, flat_output in enumerate(flat_outputs): for i, flat_output in enumerate(flat_outputs):
jac_i = list([] for _ in range(fin_size)) jac_i = list([] for _ in range(fin_size))
for k in range(len(flat_output)): for k in range(len(flat_output)):
row_k = grad( row_k = paddle.grad(
flat_output[k], flat_output[k],
inputs, inputs,
create_graph=create_graph, create_graph=create_graph,
...@@ -371,7 +823,7 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -371,7 +823,7 @@ def jacobian(func, inputs, create_graph=False, allow_unused=False):
allow_unused=allow_unused) allow_unused=allow_unused)
for j in range(fin_size): for j in range(fin_size):
jac_i[j].append( jac_i[j].append(
reshape( paddle.reshape(
row_k[j], shape=[-1]) row_k[j], shape=[-1])
if isinstance(row_k[j], paddle.Tensor) else None) if isinstance(row_k[j], paddle.Tensor) else None)
jacobian += (tuple( jacobian += (tuple(
...@@ -491,8 +943,8 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -491,8 +943,8 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
# [0., 1., 0., 1., 0., 1., 0., 1.]])) # [0., 1., 0., 1., 0., 1., 0., 1.]]))
''' '''
inputs = _tensors(inputs, "inputs") inputs = _as_tensors(inputs)
outputs = _tensors(func(*inputs), "outputs") outputs = _as_tensors(func(*inputs))
batch_size = inputs[0].shape[0] batch_size = inputs[0].shape[0]
for input in inputs: for input in inputs:
assert input.shape[ assert input.shape[
...@@ -503,13 +955,13 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -503,13 +955,13 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
fin_size = len(inputs) fin_size = len(inputs)
fout_size = len(outputs) fout_size = len(outputs)
flat_outputs = tuple( flat_outputs = tuple(
reshape( paddle.reshape(
output, shape=[batch_size, -1]) for output in outputs) output, shape=[batch_size, -1]) for output in outputs)
jacobian = tuple() jacobian = tuple()
for i, flat_output in enumerate(flat_outputs): for i, flat_output in enumerate(flat_outputs):
jac_i = list([] for _ in range(fin_size)) jac_i = list([] for _ in range(fin_size))
for k in range(flat_output.shape[1]): for k in range(flat_output.shape[1]):
row_k = grad( row_k = paddle.grad(
flat_output[:, k], flat_output[:, k],
inputs, inputs,
create_graph=create_graph, create_graph=create_graph,
...@@ -517,7 +969,7 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False): ...@@ -517,7 +969,7 @@ def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
allow_unused=allow_unused) allow_unused=allow_unused)
for j in range(fin_size): for j in range(fin_size):
jac_i[j].append( jac_i[j].append(
reshape( paddle.reshape(
row_k[j], shape=[-1]) row_k[j], shape=[-1])
if isinstance(row_k[j], paddle.Tensor) else None) if isinstance(row_k[j], paddle.Tensor) else None)
jacobian += (tuple( jacobian += (tuple(
...@@ -652,7 +1104,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): ...@@ -652,7 +1104,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False):
# [0., 2., 0., 2., 0., 2., 0., 2.]]), None), (None, None)) # [0., 2., 0., 2., 0., 2., 0., 2.]]), None), (None, None))
''' '''
inputs = _tensors(inputs, "inputs") inputs = _as_tensors(inputs)
outputs = func(*inputs) outputs = func(*inputs)
batch_size = inputs[0].shape[0] batch_size = inputs[0].shape[0]
for input in inputs: for input in inputs:
...@@ -663,7 +1115,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False): ...@@ -663,7 +1115,7 @@ def batch_hessian(func, inputs, create_graph=False, allow_unused=False):
], "The function to compute batched Hessian matrix should return a Tensor of shape [batch_size, 1]" ], "The function to compute batched Hessian matrix should return a Tensor of shape [batch_size, 1]"
def jac_func(*ins): def jac_func(*ins):
grad_inputs = grad( grad_inputs = paddle.grad(
outputs, outputs,
ins, ins,
create_graph=True, create_graph=True,
...@@ -782,14 +1234,14 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): ...@@ -782,14 +1234,14 @@ def hessian(func, inputs, create_graph=False, allow_unused=False):
# [0., 1., 1., 2.]]), None), (None, None)) # [0., 1., 1., 2.]]), None), (None, None))
''' '''
inputs = _tensors(inputs, "inputs") inputs = _as_tensors(inputs)
outputs = func(*inputs) outputs = func(*inputs)
assert isinstance(outputs, paddle.Tensor) and outputs.shape == [ assert isinstance(outputs, paddle.Tensor) and outputs.shape == [
1 1
], "The function to compute Hessian matrix should return a Tensor with a single element" ], "The function to compute Hessian matrix should return a Tensor with a single element"
def jac_func(*ins): def jac_func(*ins):
grad_inputs = grad( grad_inputs = paddle.grad(
outputs, outputs,
ins, ins,
create_graph=True, create_graph=True,
...@@ -803,7 +1255,6 @@ def hessian(func, inputs, create_graph=False, allow_unused=False): ...@@ -803,7 +1255,6 @@ def hessian(func, inputs, create_graph=False, allow_unused=False):
jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused) jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused)
@framework.dygraph_only
def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): def vhp(func, inputs, v=None, create_graph=False, allow_unused=False):
''' '''
.. note:: .. note::
...@@ -887,177 +1338,17 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False): ...@@ -887,177 +1338,17 @@ def vhp(func, inputs, v=None, create_graph=False, allow_unused=False):
# [[8., 8.], # [[8., 8.],
# [8., 8.]]), None]) # [8., 8.]]), None])
''' '''
xs = _tensors(inputs, "inputs") xs = _as_tensors(inputs)
if v is not None: if v is not None:
v = _tensors(v, "v") v = _as_tensors(v)
xs, v = _separate(xs), _separate(v)
with gradient_scope(
xs, v, create_graph=create_graph,
allow_unused=allow_unused) as [xs, v, grad_fn, return_fn]:
outputs = func(*xs) outputs = func(*xs)
ys = _tensors(outputs, "outputs") ys = _as_tensors(outputs)
assert len(ys) == 1 and isinstance( assert len(ys) == 1 and isinstance(
ys[0], paddle.Tensor ys[0], framework.Variable
) and ys[0].shape == [ ) and ys[0].shape == [
1 1
], "The function to compute vhp should return a Tensor with a single element" ], "The function to compute vhp should return a Tensor with a single element"
jac = grad_fn(ys, xs, create_graph=True) jac = _grad(ys, xs)
vhp = grad_fn(jac, xs, v) vhp = _grad(jac, xs, v)
outputs, vhp = return_fn(outputs), return_fn(vhp)
return outputs, vhp return outputs, vhp
class Jacobian(object):
r"""
Computes the Jacobian matrix of function `func`, which may take as input
single or multiple tensor typed arguments and output a single tensor or
multiple tensors.
In case `func` is multi-input and multi-output, i.e.,
func: Callable[[Tensor, ...], [Tensor, ...]]
`func` is treated as a vector valued function with all its inputs flattened
into a single one dimensional tensor, or a two dimensional tensor with the
first dimension retained as the batching dimension. The same rule applies to
the function outputs.
Once the Jacobian J is constructed, there are four ways to retrieve the
partial derivatives.
- J[:], retrieving the full matrix.
- J[:, j], retrieving the partial derivatives w.r.t. the j'th input
variable.
- J[i, :], retrieving the partial derivatives w.r.t. the i'th output
variable.
- J[i, j], retrieving the partial derivatives w.r.t. the i'th output
variable and the j'th input variable.
Examples:
.. code-block:: python
import paddle
import numpy as np
def func(xs):
x, y = xs
return paddle.matmul(x, y)
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
x = paddle.static.data(name='x', shape=[2, 2], dtype='float32')
JJ = paddle.autograd.functional.Jacobian(func, [x, x])
nrow, ncol = JJ.shape()
full_jacobian = JJ[:]
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup)
feeds = {'x': np.array([[2., 2.], [2., 1.]]).astype('float32')}
jacobian = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0]
print(jacobian)
# [[4. 2. 2. 0. 4. 2. 2. 0.]
# [2. 3. 0. 2. 2. 3. 0. 2.]
# [2. 0. 3. 2. 2. 0. 3. 2.]
# [0. 2. 2. 2. 0. 2. 2. 2.]]
"""
def __init__(self, func, inputs, batch=False):
r"""Constructing a Jacobian matrix.
Parameters:
func (Callable): a Python function that takes as input a Tensor
or a Tensor list and outputs a Tensor or a Tensor list.
inputs (Tensor|list[Tensor]): a Tensor or a list of Tensors as
`func`'s input.
batch (bool): if True the 0'th axis is considered the batch
dimension, both on input and output.
"""
def enable_grads(inputs):
if isinstance(inputs, (list, tuple)):
for x in inputs:
x.stop_gradient = False
else:
assert isinstance(inputs, paddle.fluid.framework.Variable), (
f"Expecting {inputs} to be paddle.fluid.framework.Variable,"
f" however it's found to be a(n) {type(inputs)}.")
inputs.stop_gradient = False
return inputs
self.batch = batch
self.xs = enable_grads(inputs)
ys = func(inputs)
if not isinstance(ys, list):
ys = [ys]
self.y = self.flatten_all(ys)
self.ydim = self.y.shape[-1]
self.xdim = self.flatten_all(inputs).shape[-1]
self.bdim = self.y.shape[0]
self.jacobian = {}
def flatten(self, x):
to = [x.shape[0], -1] if self.batch else [-1]
return x.reshape(to)
def flatten_all(self, xs):
if isinstance(xs, (list, tuple)):
return paddle.concat([self.flatten(x) for x in xs], axis=-1)
else:
return self.flatten(xs)
def shape(self):
return (self.ydim, self.xdim)
def __getitem__(self, tup):
if hasattr(tup, '__iter__'):
i, j = tup
else:
i, j = tup, None
full = isinstance(i, slice)
if full:
if 'full' not in self.jacobian:
rows = [
self.flatten_all(gradients(self.y[..., i], self.xs))
for i in range(self.ydim)
]
self.jacobian['full'] = full_jacobian = paddle.stack(rows)
else:
full_jacobian = self.jacobian['full']
return full_jacobian[i] if j is None else full_jacobian[i][..., j]
assert 0 <= i < self.ydim, f"Jacobian index i={i} is not valid."
assert j is None or isinstance(j, slice) or (0 <= j < self.xdim), (
f"Jacobian index j={j} is not valid.")
if 'full' in self.jacobian:
JJ = self.jacobian['full']
else:
JJ = self.jacobian
if i not in self.jacobian:
self.jacobian[i] = self.flatten_all(
gradients(self.y[..., i], self.xs))
if j is None:
return JJ[i]
else:
return JJ[i][..., j]
class Hessian(object):
def __init__(self, func, inputs, batch=False):
f_x = lambda xs: Jacobian(func, xs, batch=batch)[0]
self.symbolic = Jacobian(f_x, inputs, batch=batch)
self.xs = inputs
self.batch = batch
def __getitem__(self, tup):
return self.symbolic[tup]
def shape(self):
return self.symbolic.shape()
...@@ -6,6 +6,5 @@ foreach(TEST_OP ${TEST_OPS}) ...@@ -6,6 +6,5 @@ foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
endforeach(TEST_OP) endforeach(TEST_OP)
set_tests_properties(test_jacobian PROPERTIES TIMEOUT 50) set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 100)
set_tests_properties(test_hessian PROPERTIES TIMEOUT 50) set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 100)
set_tests_properties(test_vhp PROPERTIES TIMEOUT 50)
...@@ -11,35 +11,39 @@ ...@@ -11,35 +11,39 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle import paddle
DEVICES = [paddle.CPUPlace()]
if paddle.is_compiled_with_cuda():
DEVICES.append(paddle.CUDAPlace(0))
def _tensors(ts, name): DEFAULT_DTYPE = 'float64'
if isinstance(ts, (list, tuple)):
assert len(ts) > 0, "{} connot be empty".format(name)
for each_t in ts:
assert isinstance(
each_t, paddle.Tensor
) or each_t is None, "Elements of {} must be paddle.Tensor or None".format(
name)
return list(ts)
else:
assert isinstance(ts, paddle.Tensor), "{} must be Tensor".format(name)
return [ts]
def _stack_tensor_or_return_none(origin_list):
assert len(origin_list) > 0, "Can't not stack an empty list"
return paddle.stack(
origin_list, axis=0) if isinstance(origin_list[0],
paddle.Tensor) else None
def _replace_none_with_zero_tensor(t, spec_t): # The numerical tolerance of different dtype of different order different
if t is None: # derivative. It's a empirical value provided by Paddle Science team.
zero_t = paddle.zeros(shape=spec_t.shape, dtype=spec_t.dtype) TOLERANCE = {
zero_t.stop_gradient = spec_t.stop_gradient "float32": {
return zero_t "first_order_grad": {
else: "rtol": 1e-3,
return t "atol": 1e-3,
"eps": 1e-4
},
"second_order_grad": {
"rtol": 1e-2,
"atol": 1e-2,
"eps": 1e-2
}
},
"float64": {
"first_order_grad": {
"rtol": 1e-7,
"atol": 1e-7,
"eps": 1e-7
},
"second_order_grad": {
"rtol": 1e-5,
"atol": 1e-5,
"eps": 1e-5
}
}
}
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import typing
import unittest
import numpy as np
import paddle
import paddle.compat as cpt
import paddle.nn.functional as F
from paddle.autograd.functional import _as_tensors
import config
import utils
from utils import (_compute_numerical_batch_hessian, _compute_numerical_hessian,
_compute_numerical_vhp, _compute_numerical_jacobian,
_compute_numerical_batch_jacobian)
from utils import matmul, mul, nested, o2, pow, reduce, reduce_dim, unuse
def make_v(f, inputs):
outputs = _as_tensors(f(*inputs))
return [paddle.ones_like(x) for x in outputs]
class TestAutogradFunctional(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.RAW_INPUTS = {
'a': [1.0],
'b': [1.0, 2.0],
'c': [3.0, 4.0],
'd': [[2.0], [3.0]],
'A': [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]],
'B': [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]],
}
def setUp(self):
pass
def gen_input(self, inp, stop_gradient=False):
if isinstance(inp, paddle.Tensor):
return inp
return paddle.to_tensor(
self.RAW_INPUTS[inp], stop_gradient=stop_gradient)
def gen_inputs(self, inputs):
if isinstance(inputs, list):
inputs = [self.gen_input(x) for x in inputs]
else:
inputs = [self.gen_input(inputs)]
return inputs
def gen_test_pairs(self,
func,
inputs,
v=None,
create_graph=False,
allow_unused=False):
def vjp_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs, inputs_grad = paddle.autograd.vjp(func, xs, v)
else:
outputs, inputs_grad = paddle.autograd.vjp(func, xs)
return outputs, inputs_grad
def grad_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs = func(*xs)
if v is not None:
inputs_grad = paddle.grad(
outputs,
xs,
v,
create_graph=create_graph,
allow_unused=allow_unused)
else:
inputs_grad = paddle.grad(
outputs,
xs,
create_graph=create_graph,
allow_unused=allow_unused)
return outputs, inputs_grad
return vjp_test, grad_test
def gen_jvp_tests(self,
func,
inputs,
v=None,
create_graph=False,
allow_unused=False):
def jvp_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs, outputs_grad = paddle.autograd.jvp(
func,
xs,
v,
create_graph=create_graph,
allow_unused=allow_unused)
else:
outputs, outputs_grad = paddle.autograd.jvp(
func,
xs,
create_graph=create_graph,
allow_unused=allow_unused)
return outputs, outputs_grad
return jvp_test
def check_results(self, ref, res):
type_error = 'Result is different than expected in shape or type'
value_error = 'Result is different than expected values'
if ref is None:
self.assertTrue(res is None, type_error)
elif isinstance(ref, paddle.Tensor):
self.assertTrue(isinstance(res, paddle.Tensor), type_error)
np.testing.assert_allclose(res, ref)
else:
self.assertTrue(len(res) == len(ref), type_error)
for i in range(len(ref)):
self.check_results(ref[i], res[i])
return True
class TestVJP(TestAutogradFunctional):
def test_vjp_i1o1(self):
test_cases = [
[reduce, 'A'], # noqa
[reduce_dim, 'A'], # noqa
] # noqa
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o1(self):
test_cases = [
[matmul, ['A', 'B']], # noqa
[mul, ['b', 'c']], # noqa
] # noqa
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o2(self):
test_cases = [
[o2, ['A', 'A']], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
v = make_v(f, inputs)
vjp, grad = self.gen_test_pairs(f, inputs, v=v)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o2_omitting_v(self):
test_cases = [
[o2, ['A', 'A']], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_nested(self):
x = self.gen_input('a')
test_cases = [
[nested(x), 'a'], # noqa
]
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_aliased_input(self):
x = self.gen_input('a')
ref = self.gen_test_pairs(nested(x), 'a')[0]
aliased = self.gen_test_pairs(nested(x), x)[0]
ref_result, aliased_result = ref(), aliased()
self.check_results(ref_result, aliased_result)
@utils.place(config.DEVICES)
@utils.parameterize(
(utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), (
('v_shape_not_equal_ys', utils.square, np.random.rand(3),
np.random.rand(1), RuntimeError), ))
class TestVJPException(unittest.TestCase):
def test_vjp(self):
with self.assertRaises(self.expected_exception):
paddle.autograd.vjp(self.fun,
paddle.to_tensor(self.xs),
paddle.to_tensor(self.v))
def jac(grad_fn, f, inputs):
assert grad_fn in [paddle.autograd.vjp, paddle.autograd.jvp]
if grad_fn is paddle.autograd.jvp:
vs = [paddle.zeros_like(x) for x in inputs]
else:
outputs = f(*inputs)
if isinstance(outputs, paddle.Tensor):
outputs = [outputs]
vs = [paddle.zeros_like(y) for y in outputs]
JJ_cols = []
for i, v in enumerate(vs):
v = v.flatten()
for j in range(len(v)):
_v = paddle.zeros_like(v).detach()
_v[j] = 1.0
_v = _v.reshape(vs[i].shape)
_vs = vs.copy()
_vs[i] = _v
_, grads = grad_fn(f, inputs, _vs)
d_outs = paddle.concat([d_out.flatten() for d_out in grads])
JJ_cols.append(d_outs)
# JJ is the fully unrolled jacobian
JJ = paddle.stack(JJ_cols)
if grad_fn is paddle.autograd.vjp:
JJ = JJ.t()
return JJ
class TestJVP(TestAutogradFunctional):
def test_jvp_i1o1(self):
test_cases = [
[reduce, 'A'], # noqa
[reduce_dim, 'A'], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(paddle.autograd.jvp, f, inputs)
reverse_jac = jac(paddle.autograd.vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o1(self):
test_cases = [ # noqa
[matmul, ['A', 'B']], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(paddle.autograd.jvp, f, inputs)
reverse_jac = jac(paddle.autograd.vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o2(self):
test_cases = [ # noqa
[o2, ['A', 'A']], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(paddle.autograd.jvp, f, inputs)
reverse_jac = jac(paddle.autograd.vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o2_omitting_v(self):
test_cases = [ # noqa
[o2, ['A', 'A']], # noqa
] # noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
results_omitting_v = paddle.autograd.jvp(f, inputs)
v = [paddle.ones_like(x) for x in inputs]
results_with_v = paddle.autograd.jvp(f, inputs, v)
self.check_results(results_omitting_v, results_with_v)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'func', 'xs'), (
('1d_in_1d_out', utils.square, np.array([2., 3.])),
('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)),
('single_in_single_out', utils.square, np.random.rand(2, 3)),
('multi_in_single_out', paddle.matmul,
(np.random.rand(2, 2), np.random.rand(2, 2))), ))
class TestJacobianClassNoBatch(unittest.TestCase):
def setUp(self):
self._dtype = self.xs[0].dtype if isinstance(
self.xs, typing.Sequence) else self.xs.dtype
self._eps = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("eps")
self._rtol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("rtol")
self._atol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("atol")
self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
self._actual = paddle.autograd.Jacobian(self.func, self.xs, False)
self._expected = self._expected()
def test_jacobian(self):
Index = collections.namedtuple('Index', ('type', 'value'))
indexes = (Index('all', (slice(0, None, None), slice(0, None, None))),
Index('row', (0, slice(0, None, None))),
Index('col', (slice(0, None, None), 0)),
Index('multi-row', (slice(0, 2, 1), slice(0, None, None))))
self.assertEqual(self._actual[:].numpy().dtype, self._expected.dtype)
for index in indexes:
np.testing.assert_allclose(
self._actual.__getitem__(index.value),
self._expected.__getitem__(index.value),
rtol=self._rtol,
atol=self._atol,
err_msg=f'Testcase {index.type} index not passed, value is {index.value}'
)
def _expected(self):
jac = utils._compute_numerical_jacobian(self.func, self.xs, self._eps,
self._dtype)
return utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NM)
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'func', 'xs'), (
('1d_in_1d_out', utils.square, np.array([[1., 2., 3.], [3., 4., 3.]])),
('3d_in_3d_out', utils.square, np.random.rand(2, 3, 4)),
('multi_in_single_out', utils.square, np.random.rand(2, 3)), ))
class TestJacobianClassBatchFirst(unittest.TestCase):
def setUp(self):
self._dtype = self.xs[0].dtype if isinstance(
self.xs, typing.Sequence) else self.xs.dtype
self._eps = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("eps")
self._rtol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("rtol")
self._atol = config.TOLERANCE.get(str(self._dtype)).get(
"first_order_grad").get("atol")
self.xs = [paddle.to_tensor(x) for x in self.xs] if isinstance(
self.xs, typing.Sequence) else paddle.to_tensor(self.xs)
self._actual = paddle.autograd.Jacobian(self.func, self.xs, True)
self._expected = self._expected()
def test_jacobian(self):
Index = collections.namedtuple('Index', ('type', 'value'))
indexes = (
Index('all', (slice(0, None, None), slice(0, None, None),
slice(0, None, None))),
Index('row', (slice(0, None, None), 0, slice(0, None, None))),
Index('col',
(slice(0, None, None), slice(0, None, None), 0)), Index(
'batch', (slice(0, 2, None), slice(0, None, None),
slice(0, None, None))),
Index('multi_row',
(slice(0, 1, None), slice(0, 2, 1), slice(0, None, None))))
self.assertEqual(self._actual[:].numpy().dtype, self._expected.dtype)
for index in indexes:
np.testing.assert_allclose(
self._actual.__getitem__(index.value),
self._expected.__getitem__(index.value),
rtol=self._rtol,
atol=self._atol,
err_msg=f'Testcase {index.type} index not passed, value is {index.value}'
)
def _expected(self):
jac = utils._compute_numerical_batch_jacobian(
self.func, self.xs, self._eps, self._dtype, False)
jac = utils._np_concat_matrix_sequence(jac, utils.MatrixFormat.NBM)
return utils._np_transpose_matrix_format(jac, utils.MatrixFormat.NBM,
utils.MatrixFormat.BNM)
class TestHessianClassNoBatch(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = utils._compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian)
self.x.stop_gradient = False
hessian = paddle.autograd.Hessian(func, self.x)
np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian,
self.rtol, self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_hessian = utils._compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.Hessian(func, [self.x, self.y])
np.testing.assert_allclose(
hessian[:].numpy(),
numerical_hessian,
rtol=self.rtol,
atol=self.atol)
def test_allow_unused_true(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = utils._compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.Hessian(func, [self.x, self.y])
np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian,
self.rtol, self.atol)
def test_create_graph_true(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_hessian = utils._compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
numerical_hessian = utils._np_concat_matrix_sequence(numerical_hessian)
self.x.stop_gradient = False
hessian = paddle.autograd.Hessian(func, self.x)
assert hessian[:].stop_gradient == False
np.testing.assert_allclose(hessian[:].numpy(), numerical_hessian,
self.rtol, self.atol)
def test_out_not_single(self):
def func(x):
return x * x
with self.assertRaises(RuntimeError):
paddle.autograd.Hessian(func, paddle.ones([3]))
class TestHessianClassBatchFirst(unittest.TestCase):
@classmethod
def setUpClass(self):
self.x_shape = (5, 2)
self.weight_shape = (2, 4)
self.y_shape = (5, 2)
self.nbatch, self.nrow = 5, 2
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('eps')
self.rtol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('rtol')
self.atol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('atol')
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
expected = utils._compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
H = paddle.autograd.Hessian(func, self.x, is_batched=True)
actual = utils._np_transpose_matrix_format(
H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM)
actual = actual.reshape((H.shape[1], -1))
np.testing.assert_allclose(actual, expected, self.rtol, self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.matmul(x * x * y * y, self.weight)[:, 0:1]
xs_len = 2
expected = utils._compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
expected = np.reshape(
np.array(expected),
(xs_len, xs_len, self.nrow, self.nbatch, self.nrow))
expected = [[n for n in row] for row in expected]
expected = utils._np_concat_matrix_sequence(expected)
self.x.stop_gradient = False
self.y.stop_gradient = False
H = paddle.autograd.Hessian(func, [self.x, self.y], is_batched=True)
actual = utils._np_transpose_matrix_format(
H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM)
np.testing.assert_allclose(actual, expected, self.rtol, self.atol)
def test_allow_unused(self):
def func(x, y):
return paddle.matmul(x * x, self.weight)[:, 0:1]
xs_len = 2
expected = utils._compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
expected = np.reshape(
np.array(expected),
(xs_len, xs_len, self.nrow, self.nbatch, self.nrow))
expected = [[n for n in row] for row in expected]
expected = utils._np_concat_matrix_sequence(expected)
expected = utils._np_transpose_matrix_format(
expected, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM)
actual = paddle.autograd.Hessian(
func, [self.x, self.y], is_batched=True)[:]
np.testing.assert_allclose(
actual, expected, rtol=self.rtol, atol=self.atol)
def test_stop_gradient(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
expected = utils._compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
x = self.x.clone()
x.stop_gradient = True
H = paddle.autograd.Hessian(func, self.x, is_batched=True)[:]
actual = utils._np_transpose_matrix_format(
H[:].numpy(), utils.MatrixFormat.BNM, utils.MatrixFormat.NBM)
actual = actual.reshape((H.shape[1], -1))
np.testing.assert_allclose(actual, expected, self.rtol, self.atol)
def test_out_not_single(self):
def func(x):
return (x * x)
with self.assertRaises(RuntimeError):
paddle.autograd.Hessian(func, paddle.ones((3, 3)), is_batched=True)
class TestHessian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x)
np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0],
self.rtol, self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_hessian = _compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(func, [self.x, self.y])
for i in range(len(hessian)):
for j in range(len(hessian[0])):
np.testing.assert_allclose(hessian[i][j].numpy(),
numerical_hessian[i][j], self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(
func, [self.x, self.y], allow_unused=True)
for i in range(len(hessian)):
for j in range(len(hessian[0])):
if i == j == 0:
np.testing.assert_allclose(hessian[i][j].numpy(),
numerical_hessian[i][j],
self.rtol, self.atol)
else:
assert hessian[i][j] is None
def test_create_graph_false(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x)
assert hessian.stop_gradient == True
np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0],
self.rtol, self.atol)
try:
paddle.grad(hessian, self.x)
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x, create_graph=True)
assert hessian.stop_gradient == False
np.testing.assert_allclose(hessian.numpy(), numerical_hessian[0][0],
self.rtol, self.atol)
triple_grad = paddle.grad(hessian, self.x)
assert triple_grad is not None
class TestHessianFloat64(TestHessian):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
class TestBatchHessian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.x_shape = (5, 2)
self.weight_shape = (2, 4)
self.y_shape = (5, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True)
np.testing.assert_allclose(hessian, numerical_hessian, self.rtol,
self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.matmul(x * x * y * y, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, [self.x, self.y])
shape_tensor = paddle.to_tensor(numerical_hessian).astype("float64")
hessian_reshape = np.reshape(hessian, (shape_tensor.shape))
np.testing.assert_allclose(hessian_reshape, numerical_hessian,
self.rtol, self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.matmul(x * x, self.weight)[:, 0:1]
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(
func, [self.x, self.y], allow_unused=True)
for i in range(len(hessian)):
for j in range(len(hessian[0])):
if i == j == 0:
numerical_hessian = np.stack(
(numerical_hessian[i][j], numerical_hessian[i][j + 1]),
axis=0)
np.testing.assert_allclose(hessian[i][j], numerical_hessian,
self.rtol, self.atol)
else:
assert hessian[i][j] is None
def test_create_graph_false(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x)
assert hessian.stop_gradient == True
np.testing.assert_allclose(hessian.numpy(), numerical_hessian,
self.rtol, self.atol)
try:
paddle.grad(hessian, self.x)
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True)
assert hessian.stop_gradient == False
np.testing.assert_allclose(hessian.numpy(), numerical_hessian,
self.rtol, self.atol)
triple_grad = paddle.grad(hessian, self.x)
assert triple_grad is not None
class TestBatchHessianFloat64(TestBatchHessian):
@classmethod
def setUpClass(self):
self.x_shape = (5, 2)
self.weight_shape = (2, 4)
self.y_shape = (5, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
class TestVHP(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("eps")
self.rtol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("rtol")
self.atol = config.TOLERANCE.get(self.dtype).get(
"second_order_grad").get("atol")
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vx = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vy = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_func_output = func(self.x).numpy()
numerical_vhp = _compute_numerical_vhp(
func, self.x, self.vx, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx)
np.testing.assert_allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_func_output = func(self.x, self.y).numpy()
numerical_vhp = _compute_numerical_vhp(
func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y],
[self.vx, self.vy])
np.testing.assert_allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
for i in range(len(vhp)):
np.testing.assert_allclose(vhp[i].numpy(), numerical_vhp[i],
self.rtol, self.atol)
def test_v_default(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_func_output = func(self.x, self.y).numpy()
vx = paddle.ones(self.vx.shape, dtype=self.vx.dtype)
vy = paddle.ones(self.vy.shape, dtype=self.vy.dtype)
numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y],
[vx, vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y])
np.testing.assert_allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
for i in range(len(vhp)):
np.testing.assert_allclose(vhp[i].numpy(), numerical_vhp[i],
self.rtol, self.atol)
def test_allow_unused_true(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
numerical_func_output = func(self.x, self.y).numpy()
numerical_vhp = _compute_numerical_vhp(
func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y],
[self.vx, self.vy])
np.testing.assert_allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
def test_create_graph_true(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_func_output = func(self.x).numpy()
numerical_vhp = _compute_numerical_vhp(
func, self.x, self.vx, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx)
np.testing.assert_allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
assert vhp[0].stop_gradient == False
np.testing.assert_allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
triple_grad = paddle.grad(vhp, self.x)
assert triple_grad is not None
class TestJacobian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (4, 4)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-4
self.rtol = 1e-3
self.atol = 1e-3
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input_and_single_output(self):
def func(x):
return paddle.matmul(x, x)
numerical_jacobian = _compute_numerical_jacobian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, self.x)
np.testing.assert_allclose(jacobian.numpy(), numerical_jacobian[0][0],
self.rtol, self.atol)
def test_single_input_and_multi_output(self):
def func(x):
return paddle.matmul(x, x), x * x
numerical_jacobian = _compute_numerical_jacobian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, self.x)
for i in range(len(jacobian)):
np.testing.assert_allclose(jacobian[i].numpy(),
numerical_jacobian[i][0], self.rtol,
self.atol)
def test_multi_input_and_single_output(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
np.testing.assert_allclose(jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
def test_multi_input_and_multi_output(self):
def func(x, y):
return paddle.matmul(x, y), x * y
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for i in range(len(jacobian)):
for j in range(len(jacobian[0])):
np.testing.assert_allclose(jacobian[i][j].numpy(),
numerical_jacobian[i][j], self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.matmul(x, x)
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.matmul(x, x)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(
func, [self.x, self.y], allow_unused=True)
np.testing.assert_allclose(
jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol)
assert jacobian[1] is None
def test_create_graph_false(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == True
np.testing.assert_allclose(jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
try:
paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(
func, [self.x, self.y], create_graph=True)
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == False
np.testing.assert_allclose(jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
double_grad = paddle.grad(jacobian[0], [self.x, self.y])
assert double_grad is not None
class TestJacobianFloat64(TestJacobian):
@classmethod
def setUpClass(self):
self.shape = (4, 4)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-7
self.rtol = 1e-7
self.atol = 1e-7
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
class TestJacobianBatch(unittest.TestCase):
@classmethod
def setUpClass(self):
self.x_shape = (4, 2)
self.weight_shape = (2, 4)
self.y_shape = (4, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-4
self.rtol = 1e-3
self.atol = 1e-3
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
def test_batch_single_input_and_batch_single_output(self):
def func(x):
return paddle.matmul(paddle.matmul(x, self.weight), self.y)
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(
func,
self.x, )
self.assertTrue(
np.allclose(batch_jacobian.numpy().all(), numerical_jacobian[0][0]
.all()))
def test_batch_single_input_and_batch_multi_output(self):
def func(x):
return paddle.matmul(paddle.matmul(x, self.weight), self.y), x * x
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(
func,
self.x, )
for i in range(len(batch_jacobian)):
np.testing.assert_allclose(batch_jacobian[i].numpy(),
numerical_jacobian[i][0], self.rtol,
self.atol)
def test_batch_multi_input_and_batch_single_output(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for j in range(len(batch_jacobian)):
np.testing.assert_allclose(batch_jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
def test_batch_multi_input_and_batch_multi_output(self):
def func(x, y):
return x * y, x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for i in range(len(batch_jacobian)):
np.testing.assert_allclose(batch_jacobian[i], numerical_jacobian[i],
self.rtol, self.atol)
def test_allow_unused_false(self):
def func(x, y):
return x * x
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return x * x
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(
func, [self.x, self.y], allow_unused=True)
np.testing.assert_allclose(
jacobian[0].numpy(), numerical_jacobian[0][0], self.rtol, self.atol)
assert jacobian[1] is None
def test_create_graph_false(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == True
np.testing.assert_allclose(jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
try:
paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(
func, [self.x, self.y], create_graph=True)
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == False
np.testing.assert_allclose(jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol,
self.atol)
double_grad = paddle.grad(jacobian[0], [self.x, self.y])
assert double_grad is not None
class TestJacobianBatchFloat64(TestJacobianBatch):
@classmethod
def setUpClass(self):
self.x_shape = (12, 2)
self.weight_shape = (2, 12)
self.y_shape = (12, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('eps')
self.rtol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('rtol')
self.atol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('atol')
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
if __name__ == "__main__":
unittest.main()
...@@ -12,11 +12,131 @@ ...@@ -12,11 +12,131 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import typing
import unittest import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utils import _compute_numerical_jacobian, _compute_numerical_batch_jacobian
import config
import utils
from utils import (_compute_numerical_batch_jacobian,
_compute_numerical_jacobian)
from paddle.autograd.functional import _as_tensors
paddle.enable_static()
@utils.place(config.DEVICES)
@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'stop_gradient'), (
('tensor_input', utils.reduce, np.random.rand(2, 3), None, False),
('tensor_sequence_input', utils.reduce, np.random.rand(2, 3), None, False),
('v_not_none', utils.reduce, np.random.rand(2, 3), np.random.rand(1),
False),
('xs_stop_gradient', utils.reduce, np.random.rand(2, 3), np.random.rand(1),
True),
('func_mutmul', utils.matmul, (np.random.rand(3, 2), np.random.rand(2, 3)),
None, False),
('func_mul', utils.mul, (np.random.rand(3, 3), np.random.rand(3, 3)), None,
False),
('func_out_two', utils.o2, (np.random.rand(10), np.random.rand(10)), None,
False), ))
class TestVJP(unittest.TestCase):
def setUp(self):
self.dtype = str(self.xs[0].dtype) if isinstance(
self.xs, typing.Sequence) else str(self.xs.dtype)
self._rtol = config.TOLERANCE.get(str(self.dtype)).get(
"first_order_grad").get("rtol")
self._atol = config.TOLERANCE.get(str(self.dtype)).get(
"first_order_grad").get("atol")
def _vjp(self):
exe = paddle.static.Executor()
sp = paddle.static.Program()
mp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = gen_static_data_and_feed(
self.xs, self.v, stop_gradient=self.stop_gradient)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v)
exe.run(sp)
return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads])
def _expected_vjp(self):
exe = paddle.static.Executor()
sp = paddle.static.Program()
mp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = gen_static_data_and_feed(self.xs,
self.v, False)
ys = self.fun(*static_xs) if isinstance(
static_xs, typing.Sequence) else self.fun(static_xs)
xs_grads = paddle.static.gradients(ys, static_xs, static_v)
exe.run(sp)
return exe.run(mp, feed=feed, fetch_list=[ys, xs_grads])
def test_vjp(self):
actual = self._vjp()
expected = self._expected_vjp()
self.assertEqual(len(actual), len(expected))
for i in range(len(actual)):
np.testing.assert_allclose(
actual[i], expected[i], rtol=self._rtol, atol=self._atol)
@utils.place(config.DEVICES)
@utils.parameterize(
(utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'expected_exception'), (
('v_shape_not_equal_ys', utils.square, np.random.rand(3),
np.random.rand(1), RuntimeError), ))
class TestVJPException(unittest.TestCase):
def setUp(self):
self.exe = paddle.static.Executor()
def _vjp(self):
sp = paddle.static.Program()
mp = paddle.static.Program()
with paddle.static.program_guard(mp, sp):
feed, static_xs, static_v = gen_static_data_and_feed(self.xs,
self.v)
ys, xs_grads = paddle.autograd.vjp(self.fun, static_xs, static_v)
self.exe.run(sp)
return self.exe.run(mp, feed, fetch_list=[ys, xs_grads])
def test_vjp(self):
with self.assertRaises(self.expected_exception):
self._vjp()
def gen_static_data_and_feed(xs, v, stop_gradient=True):
feed = {}
if isinstance(xs, typing.Sequence):
static_xs = []
for i, x in enumerate(xs):
x = paddle.static.data(f"x{i}", x.shape, x.dtype)
x.stop_gradient = stop_gradient
static_xs.append(x)
feed.update({f'x{idx}': value for idx, value in enumerate(xs)})
else:
static_xs = paddle.static.data('x', xs.shape, xs.dtype)
static_xs.stop_gradient = stop_gradient
feed.update({'x': xs})
if isinstance(v, typing.Sequence):
static_v = []
for i, e in enumerate(v):
e = paddle.static.data(f'v{idx}', v.shape, v.dtype)
e.stop_gradient = stop_gradient
static_v.append(e)
feed.update({f'v{idx}': value for idx, value in v})
elif v is not None:
static_v = paddle.static.data('v', v.shape, v.dtype)
static_v.stop_gradient = stop_gradient
feed.update({'v': v})
else:
static_v = v
return feed, static_xs, static_v
def approx_jacobian(f, xs, dtype, eps=1e-5, batch=False): def approx_jacobian(f, xs, dtype, eps=1e-5, batch=False):
...@@ -106,8 +226,13 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -106,8 +226,13 @@ class TestJacobianFloat32(unittest.TestCase):
else: else:
self.place = fluid.CPUPlace() self.place = fluid.CPUPlace()
self.dtype = 'float32' self.dtype = 'float32'
self.np_dtype = np.float32
prepare_data(self, all_data_shapes, self.dtype) prepare_data(self, all_data_shapes, self.dtype)
self.eps = 1e-4 self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get(
'eps')
# self.rtol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('rtol')
# self.atol = config.TOLERANCE.get(self.dtype).get('first_order_grad').get('atol')
# Do't use tolerance in config, which will cause this test case failed.
self.rtol = 1e-2 self.rtol = 1e-2
self.atol = 1e-2 self.atol = 1e-2
...@@ -116,8 +241,11 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -116,8 +241,11 @@ class TestJacobianFloat32(unittest.TestCase):
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
nrow, ncol = JJ.shape() if batch:
_, nrow, ncol = JJ.shape
else:
nrow, ncol = JJ.shape
full_jacobian = JJ[:] full_jacobian = JJ[:]
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(startup) exe.run(startup)
...@@ -128,17 +256,26 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -128,17 +256,26 @@ class TestJacobianFloat32(unittest.TestCase):
pd_jacobians = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0] pd_jacobians = exe.run(main, feed=feeds, fetch_list=[full_jacobian])[0]
np_jacobians = approx_jacobian( np_jacobians = approx_jacobian(
np_f, inps, self.dtype, self.eps, batch=batch) np_f, inps, self.dtype, self.eps, batch=batch)
self.assertTrue( if batch:
np.allclose(pd_jacobians, np_jacobians, self.rtol, self.atol)) np_jacobians = utils._np_transpose_matrix_format(
np_jacobians, utils.MatrixFormat.NBM, utils.MatrixFormat.BNM)
np.testing.assert_allclose(pd_jacobians, np_jacobians, self.rtol,
self.atol)
def run_test_by_rows(self, pd_f, np_f, inps, batch=False): def run_test_by_rows(self, pd_f, np_f, inps, batch=False):
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
nrow, ncol = JJ.shape() if batch:
rows = [JJ[i] for i in range(nrow)] nbatch, nrow, ncol = JJ.shape
rows = [JJ[:, i, :] for i in range(nrow)]
else:
nrow, ncol = JJ.shape
rows = [JJ[i, :] for i in range(nrow)]
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(startup) exe.run(startup)
if isinstance(inps, list): if isinstance(inps, list):
...@@ -148,16 +285,22 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -148,16 +285,22 @@ class TestJacobianFloat32(unittest.TestCase):
pd_jac = exe.run(main, feed=feeds, fetch_list=[rows]) pd_jac = exe.run(main, feed=feeds, fetch_list=[rows])
np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch) np_jac = approx_jacobian(np_f, inps, self.dtype, self.eps, batch=batch)
for i in range(nrow): for i in range(nrow):
self.assertTrue( np.testing.assert_allclose(pd_jac[i], np_jac[i], self.rtol,
np.allclose(pd_jac[i], np_jac[i], self.rtol, self.atol)) self.atol)
def run_test_by_entries(self, pd_f, np_f, inps, batch=False): def run_test_by_entries(self, pd_f, np_f, inps, batch=False):
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
JJ = paddle.autograd.functional.Jacobian(pd_f, xs, batch=batch) JJ = paddle.autograd.functional.Jacobian(pd_f, xs, is_batched=batch)
nrow, ncol = JJ.shape() if batch:
nbatch, nrow, ncol = JJ.shape
entries = [
JJ[:, i, j] for i in range(nrow) for j in range(ncol)
]
else:
nrow, ncol = JJ.shape
entries = [JJ[i, j] for i in range(nrow) for j in range(ncol)] entries = [JJ[i, j] for i in range(nrow) for j in range(ncol)]
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(startup) exe.run(startup)
...@@ -171,8 +314,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -171,8 +314,7 @@ class TestJacobianFloat32(unittest.TestCase):
np_jac[i, ..., j] for i in range(nrow) for j in range(ncol) np_jac[i, ..., j] for i in range(nrow) for j in range(ncol)
] ]
for pd_entry, np_entry in zip(pd_entries, np_entries): for pd_entry, np_entry in zip(pd_entries, np_entries):
self.assertTrue( np.testing.assert_allclose(pd_entry, np_entry, self.rtol, self.atol)
np.allclose(pd_entry, np_entry, self.rtol, self.atol))
def test_square(self): def test_square(self):
def pd_f(x): def pd_f(x):
...@@ -186,8 +328,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -186,8 +328,7 @@ class TestJacobianFloat32(unittest.TestCase):
self.run_test_by_entries(pd_f, np_f, self.A) self.run_test_by_entries(pd_f, np_f, self.A)
def test_mul(self): def test_mul(self):
def pd_f(xs): def pd_f(x, y):
x, y = xs
return paddle.multiply(x, y) return paddle.multiply(x, y)
def np_f(xs): def np_f(xs):
...@@ -202,8 +343,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -202,8 +343,7 @@ class TestJacobianFloat32(unittest.TestCase):
self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) self.run_test_by_entries(pd_f, np_f, [self.B, self.C])
def test_matmul(self): def test_matmul(self):
def pd_f(xs): def pd_f(x, y):
x, y = xs
return paddle.matmul(x, y) return paddle.matmul(x, y)
def np_f(xs): def np_f(xs):
...@@ -215,8 +355,7 @@ class TestJacobianFloat32(unittest.TestCase): ...@@ -215,8 +355,7 @@ class TestJacobianFloat32(unittest.TestCase):
self.run_test_by_entries(pd_f, np_f, [self.B, self.C]) self.run_test_by_entries(pd_f, np_f, [self.B, self.C])
def test_batch_matmul(self): def test_batch_matmul(self):
def pd_f(xs): def pd_f(x, y):
x, y = xs
return paddle.matmul(x, y) return paddle.matmul(x, y)
def np_f(xs): def np_f(xs):
...@@ -238,12 +377,15 @@ class TestJacobianFloat64(TestJacobianFloat32): ...@@ -238,12 +377,15 @@ class TestJacobianFloat64(TestJacobianFloat32):
self.place = fluid.CPUPlace() self.place = fluid.CPUPlace()
self.dtype = 'float64' self.dtype = 'float64'
prepare_data(self, all_data_shapes, self.dtype) prepare_data(self, all_data_shapes, self.dtype)
self.eps = 1e-7 self.eps = config.TOLERANCE.get(self.dtype).get('first_order_grad').get(
self.rtol = 1e-6 'eps')
self.atol = 1e-6 self.rtol = config.TOLERANCE.get(self.dtype).get(
'first_order_grad').get('rtol')
self.atol = config.TOLERANCE.get(self.dtype).get(
'first_order_grad').get('atol')
class TestHessianFloat64(unittest.TestCase): class TestHessianFloat32(unittest.TestCase):
@classmethod @classmethod
def setUpClass(self): def setUpClass(self):
paddle.enable_static() paddle.enable_static()
...@@ -251,19 +393,22 @@ class TestHessianFloat64(unittest.TestCase): ...@@ -251,19 +393,22 @@ class TestHessianFloat64(unittest.TestCase):
self.place = fluid.CUDAPlace(0) self.place = fluid.CUDAPlace(0)
else: else:
self.place = fluid.CPUPlace() self.place = fluid.CPUPlace()
self.dtype = 'float64' self.dtype = 'float32'
prepare_data(self, all_data_shapes, self.dtype) prepare_data(self, all_data_shapes, self.dtype)
self.eps = 1e-7 self.eps = config.TOLERANCE.get(self.dtype).get(
self.rtol = 1e-6 'second_order_grad').get('eps')
self.atol = 1e-6 self.rtol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('rtol')
self.atol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('atol')
def run_test_by_fullmatrix(self, pd_f, inps, np_hess, batch=False): def run_test_by_fullmatrix(self, pd_f, inps, np_hess, batch=False):
main = fluid.Program() main = fluid.Program()
startup = fluid.Program() startup = fluid.Program()
with fluid.program_guard(main, startup): with fluid.program_guard(main, startup):
xs = make_tensors(inps) xs = make_tensors(inps)
HH = paddle.autograd.functional.Hessian(pd_f, xs, batch=batch) HH = paddle.autograd.functional.Hessian(pd_f, xs, is_batched=batch)
nrow, ncol = HH.shape() nrow, ncol = HH.shape
full_hessian = HH[:] full_hessian = HH[:]
exe = fluid.Executor(self.place) exe = fluid.Executor(self.place)
exe.run(startup) exe.run(startup)
...@@ -272,36 +417,38 @@ class TestHessianFloat64(unittest.TestCase): ...@@ -272,36 +417,38 @@ class TestHessianFloat64(unittest.TestCase):
else: else:
feeds = {'x': inps} feeds = {'x': inps}
pd_hess = exe.run(main, feed=feeds, fetch_list=[full_hessian])[0] pd_hess = exe.run(main, feed=feeds, fetch_list=[full_hessian])[0]
self.assertTrue(np.allclose(pd_hess, np_hess, self.rtol, self.atol)) np.testing.assert_allclose(pd_hess, np_hess, self.rtol, self.atol)
def test_square(self): def test_square(self):
def pd_f(x): def pd_f(x):
"""Input is a square matrix.""" """Input is a square matrix."""
return paddle.matmul(x, x.T) return paddle.matmul(x, x.T).flatten().sum()
def np_hess(x): def np_hess(x):
dim = x.shape[0] dim = x.shape[0]
f_xx_upperleft = 2 * np.eye(dim, dtype=self.dtype) upperleft = 2 * np.eye(dim, dtype=self.dtype)
f_xx = np.zeros([dim * dim, dim * dim], dtype=self.dtype) upper = np.concatenate((upperleft, upperleft))
f_xx[:dim, :dim] = f_xx_upperleft return np.concatenate((upper, upper), axis=1)
return f_xx
self.run_test_by_fullmatrix(pd_f, self.B, np_hess(self.B)) self.run_test_by_fullmatrix(pd_f, self.B, np_hess(self.B))
def test_batch_square(self):
def pd_f(x):
"""Input is a square matrix."""
return paddle.matmul(x, paddle.transpose(x, [0, 2, 1]))
def np_hess(x):
bat, dim, _ = x.shape
f_xx_upperleft = 2 * np.eye(dim, dtype=self.dtype)
f_xx = np.zeros([bat, dim * dim, dim * dim], dtype=self.dtype)
f_xx[..., :dim, :dim] = f_xx_upperleft
return f_xx
self.run_test_by_fullmatrix( class TestHessianFloat64(TestHessianFloat32):
pd_f, self.E, np_hess(self.E), batch=True) @classmethod
def setUpClass(self):
paddle.enable_static()
if fluid.core.is_compiled_with_cuda():
self.place = fluid.CUDAPlace(0)
else:
self.place = fluid.CPUPlace()
self.dtype = 'float64'
prepare_data(self, all_data_shapes, self.dtype)
self.eps = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('eps')
self.rtol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('rtol')
self.atol = config.TOLERANCE.get(self.dtype).get(
'second_order_grad').get('atol')
if __name__ == "__main__": if __name__ == "__main__":
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
import paddle.compat as cpt
import paddle.nn.functional as F
from utils import _compute_numerical_hessian, _compute_numerical_batch_hessian
class TestHessian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-2
self.rtol = 1e-2
self.atol = 1e-2
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x)
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_hessian = _compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(func, [self.x, self.y])
for i in range(len(hessian)):
for j in range(len(hessian[0])):
assert np.allclose(hessian[i][j].numpy(),
numerical_hessian[i][j], self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.hessian(
func, [self.x, self.y], allow_unused=True)
for i in range(len(hessian)):
for j in range(len(hessian[0])):
if i == j == 0:
assert np.allclose(hessian[i][j].numpy(),
numerical_hessian[i][j], self.rtol,
self.atol)
else:
assert hessian[i][j] is None
def test_create_graph_false(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x)
assert hessian.stop_gradient == True
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
self.atol)
try:
paddle.grad(hessian, self.x)
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_hessian = _compute_numerical_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.hessian(func, self.x, create_graph=True)
assert hessian.stop_gradient == False
assert np.allclose(hessian.numpy(), numerical_hessian[0][0], self.rtol,
self.atol)
triple_grad = paddle.grad(hessian, self.x)
assert triple_grad is not None
class TestHessianFloat64(TestHessian):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-5
self.rtol = 1e-5
self.atol = 1e-5
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
class TestBatchHessian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.x_shape = (5, 2)
self.weight_shape = (2, 4)
self.y_shape = (5, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-2
self.rtol = 1e-3
self.atol = 1e-3
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True)
assert np.allclose(hessian, numerical_hessian, self.rtol, self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.matmul(x * x * y * y, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, [self.x, self.y])
shape_tensor = paddle.to_tensor(numerical_hessian).astype("float64")
hessian_reshape = np.reshape(hessian, (shape_tensor.shape))
assert np.allclose(hessian_reshape, numerical_hessian, self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.matmul(x * x, self.weight)[:, 0:1]
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
hessian = paddle.autograd.batch_hessian(
func, [self.x, self.y], allow_unused=True)
for i in range(len(hessian)):
for j in range(len(hessian[0])):
if i == j == 0:
numerical_hessian = np.stack(
(numerical_hessian[i][j], numerical_hessian[i][j + 1]),
axis=0)
assert np.allclose(hessian[i][j], numerical_hessian,
self.rtol, self.atol)
else:
assert hessian[i][j] is None
def test_create_graph_false(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x)
assert hessian.stop_gradient == True
assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol,
self.atol)
try:
paddle.grad(hessian, self.x)
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x):
return paddle.matmul(x * x, self.weight)[:, 0:1]
numerical_hessian = _compute_numerical_batch_hessian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
hessian = paddle.autograd.batch_hessian(func, self.x, create_graph=True)
assert hessian.stop_gradient == False
assert np.allclose(hessian.numpy(), numerical_hessian, self.rtol,
self.atol)
triple_grad = paddle.grad(hessian, self.x)
assert triple_grad is not None
class TestBatchHessianFloat64(TestBatchHessian):
@classmethod
def setUpClass(self):
self.x_shape = (5, 2)
self.weight_shape = (2, 4)
self.y_shape = (5, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-4
self.rtol = 1e-5
self.atol = 1e-5
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
import paddle.compat as cpt
from utils import _compute_numerical_jacobian, _compute_numerical_batch_jacobian
class TestJacobian(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (4, 4)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-4
self.rtol = 1e-3
self.atol = 1e-3
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input_and_single_output(self):
def func(x):
return paddle.matmul(x, x)
numerical_jacobian = _compute_numerical_jacobian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, self.x)
assert np.allclose(jacobian.numpy(), numerical_jacobian[0][0],
self.rtol, self.atol)
def test_single_input_and_multi_output(self):
def func(x):
return paddle.matmul(x, x), x * x
numerical_jacobian = _compute_numerical_jacobian(
func, self.x, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, self.x)
for i in range(len(jacobian)):
assert np.allclose(jacobian[i].numpy(), numerical_jacobian[i][0],
self.rtol, self.atol)
def test_multi_input_and_single_output(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
self.rtol, self.atol)
def test_multi_input_and_multi_output(self):
def func(x, y):
return paddle.matmul(x, y), x * y
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for i in range(len(jacobian)):
for j in range(len(jacobian[0])):
assert np.allclose(jacobian[i][j].numpy(),
numerical_jacobian[i][j], self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.matmul(x, x)
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.matmul(x, x)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(
func, [self.x, self.y], allow_unused=True)
assert np.allclose(jacobian[0].numpy(), numerical_jacobian[0][0],
self.rtol, self.atol)
assert jacobian[1] is None
def test_create_graph_false(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == True
assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
self.rtol, self.atol)
try:
paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x, y):
return paddle.matmul(x, y)
numerical_jacobian = _compute_numerical_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.jacobian(
func, [self.x, self.y], create_graph=True)
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == False
assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
self.rtol, self.atol)
double_grad = paddle.grad(jacobian[0], [self.x, self.y])
assert double_grad is not None
class TestJacobianFloat64(TestJacobian):
@classmethod
def setUpClass(self):
self.shape = (4, 4)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-7
self.rtol = 1e-7
self.atol = 1e-7
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
class TestJacobianBatch(unittest.TestCase):
@classmethod
def setUpClass(self):
self.x_shape = (4, 2)
self.weight_shape = (2, 4)
self.y_shape = (4, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-4
self.rtol = 1e-3
self.atol = 1e-3
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
def test_batch_single_input_and_batch_single_output(self):
def func(x):
return paddle.matmul(paddle.matmul(x, self.weight), self.y)
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(
func,
self.x, )
self.assertTrue(
np.allclose(batch_jacobian.numpy().all(), numerical_jacobian[0][0]
.all()))
def test_batch_single_input_and_batch_multi_output(self):
def func(x):
return paddle.matmul(paddle.matmul(x, self.weight), self.y), x * x
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(
func,
self.x, )
for i in range(len(batch_jacobian)):
assert np.allclose(batch_jacobian[i].numpy(),
numerical_jacobian[i][0], self.rtol, self.atol)
def test_batch_multi_input_and_batch_single_output(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for j in range(len(batch_jacobian)):
assert np.allclose(batch_jacobian[j].numpy(),
numerical_jacobian[0][j], self.rtol, self.atol)
def test_batch_multi_input_and_batch_multi_output(self):
def func(x, y):
return x * y, x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
batch_jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for i in range(len(batch_jacobian)):
assert np.allclose(batch_jacobian[i], numerical_jacobian[i],
self.rtol, self.atol)
def test_allow_unused_false(self):
def func(x, y):
return x * x
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return x * x
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(
func, [self.x, self.y], allow_unused=True)
assert np.allclose(jacobian[0].numpy(), numerical_jacobian[0][0],
self.rtol, self.atol)
assert jacobian[1] is None
def test_create_graph_false(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(func, [self.x, self.y])
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == True
assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
self.rtol, self.atol)
try:
paddle.grad(jacobian[0], [self.x, self.y])
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x, y):
return x * y
numerical_jacobian = _compute_numerical_batch_jacobian(
func, [self.x, self.y], self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
jacobian = paddle.autograd.batch_jacobian(
func, [self.x, self.y], create_graph=True)
for j in range(len(jacobian)):
assert jacobian[j].stop_gradient == False
assert np.allclose(jacobian[j].numpy(), numerical_jacobian[0][j],
self.rtol, self.atol)
double_grad = paddle.grad(jacobian[0], [self.x, self.y])
assert double_grad is not None
class TestJacobianBatchFloat64(TestJacobianBatch):
@classmethod
def setUpClass(self):
self.x_shape = (12, 2)
self.weight_shape = (2, 12)
self.y_shape = (12, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-7
self.rtol = 1e-7
self.atol = 1e-7
self.x = paddle.rand(shape=self.x_shape, dtype=self.dtype)
self.weight = paddle.rand(shape=self.weight_shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.y_shape, dtype=self.dtype)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
import paddle.compat as cpt
import paddle.nn.functional as F
from utils import _compute_numerical_vhp
class TestVHP(unittest.TestCase):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float32'
self.np_dtype = np.float32
self.numerical_delta = 1e-2
self.rtol = 1e-2
self.atol = 1e-2
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vx = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vy = paddle.rand(shape=self.shape, dtype=self.dtype)
def test_single_input(self):
def func(x):
return paddle.sum(paddle.matmul(x, x))
numerical_func_output = func(self.x).numpy()
numerical_vhp = _compute_numerical_vhp(
func, self.x, self.vx, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx)
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
def test_multi_input(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_func_output = func(self.x, self.y).numpy()
numerical_vhp = _compute_numerical_vhp(
func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y],
[self.vx, self.vy])
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
for i in range(len(vhp)):
assert np.allclose(vhp[i].numpy(), numerical_vhp[i], self.rtol,
self.atol)
def test_v_default(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, y))
numerical_func_output = func(self.x, self.y).numpy()
vx = paddle.ones(self.vx.shape, dtype=self.vx.dtype)
vy = paddle.ones(self.vy.shape, dtype=self.vy.dtype)
numerical_vhp = _compute_numerical_vhp(func, [self.x, self.y],
[vx, vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y])
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
for i in range(len(vhp)):
assert np.allclose(vhp[i].numpy(), numerical_vhp[i], self.rtol,
self.atol)
def test_allow_unused_false(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
try:
self.x.stop_gradient = False
self.y.stop_gradient = False
_ = paddle.autograd.vhp(func, [self.x, self.y])
except ValueError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("allow_unused") > 0
def test_allow_unused_true(self):
def func(x, y):
return paddle.sum(paddle.matmul(x, x))
numerical_func_output = func(self.x, self.y).numpy()
numerical_vhp = _compute_numerical_vhp(
func, [self.x, self.y], [self.vx, self.vy], self.numerical_delta,
self.np_dtype)
self.x.stop_gradient = False
self.y.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, [self.x, self.y],
[self.vx, self.vy],
allow_unused=True)
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
assert vhp[1] is None
def test_create_graph_false(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_func_output = func(self.x).numpy()
numerical_vhp = _compute_numerical_vhp(
func, self.x, self.vx, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func, self.x, self.vx)
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
assert vhp[0].stop_gradient == True
assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
try:
paddle.grad(vhp, self.x)
except RuntimeError as e:
error_msg = cpt.get_exception_message(e)
assert error_msg.find("has no gradient") > 0
def test_create_graph_true(self):
def func(x):
return paddle.sum(F.sigmoid(x))
numerical_func_output = func(self.x).numpy()
numerical_vhp = _compute_numerical_vhp(
func, self.x, self.vx, self.numerical_delta, self.np_dtype)
self.x.stop_gradient = False
func_output, vhp = paddle.autograd.vhp(func,
self.x,
self.vx,
create_graph=True)
assert np.allclose(func_output.numpy(), numerical_func_output,
self.rtol, self.atol)
assert vhp[0].stop_gradient == False
assert np.allclose(vhp[0].numpy(), numerical_vhp[0], self.rtol,
self.atol)
triple_grad = paddle.grad(vhp, self.x)
assert triple_grad is not None
class TestVHPFloat64(TestVHP):
@classmethod
def setUpClass(self):
self.shape = (2, 2)
self.dtype = 'float64'
self.np_dtype = np.float64
self.numerical_delta = 1e-5
self.rtol = 1e-5
self.atol = 1e-5
self.x = paddle.rand(shape=self.shape, dtype=self.dtype)
self.y = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vx = paddle.rand(shape=self.shape, dtype=self.dtype)
self.vy = paddle.rand(shape=self.shape, dtype=self.dtype)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
from paddle.autograd.functional import vjp, jvp, _tensors
from paddle import grad, ones_like, zeros_like
def reduce(x):
return paddle.sum(x)
def reduce_dim(x):
return paddle.sum(x, axis=0)
def matmul(x, y):
return paddle.matmul(x, y)
def mul(x, y):
return x * y
def pow(x, y):
return paddle.pow(x, y)
def o2(x, y):
return paddle.multiply(x, y), paddle.matmul(x, y.t())
def unuse(x, y):
return paddle.sum(x)
def nested(x):
def inner(y):
return x * y
return inner
def make_v(f, inputs):
outputs = _tensors(f(*inputs), "outputs")
return [ones_like(x) for x in outputs]
class TestAutogradFunctional(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.RAW_INPUTS = {
'a': [1.0],
'b': [1.0, 2.0],
'c': [3.0, 4.0],
'd': [[2.0], [3.0]],
'A': [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]],
'B': [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]],
}
def setUp(self):
pass
def gen_input(self, inp, stop_gradient=False):
if isinstance(inp, paddle.Tensor):
return inp
return paddle.to_tensor(
self.RAW_INPUTS[inp], stop_gradient=stop_gradient)
def gen_inputs(self, inputs):
if isinstance(inputs, list):
inputs = [self.gen_input(x) for x in inputs]
else:
inputs = [self.gen_input(inputs)]
return inputs
def gen_test_pairs(self,
func,
inputs,
v=None,
create_graph=False,
allow_unused=False):
def vjp_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs, inputs_grad = vjp(func,
xs,
v,
create_graph=create_graph,
allow_unused=allow_unused)
else:
outputs, inputs_grad = vjp(func,
xs,
create_graph=create_graph,
allow_unused=allow_unused)
return outputs, inputs_grad
def grad_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs = func(*xs)
if v is not None:
inputs_grad = grad(
outputs,
xs,
v,
create_graph=create_graph,
allow_unused=allow_unused)
else:
inputs_grad = grad(
outputs,
xs,
create_graph=create_graph,
allow_unused=allow_unused)
return outputs, inputs_grad
return vjp_test, grad_test
def gen_jvp_tests(self,
func,
inputs,
v=None,
create_graph=False,
allow_unused=False):
def jvp_test():
nonlocal v
xs = self.gen_inputs(inputs)
if v is not None:
v = self.gen_inputs(v)
outputs, outputs_grad = jvp(func,
xs,
v,
create_graph=create_graph,
allow_unused=allow_unused)
else:
outputs, outputs_grad = jvp(func,
xs,
create_graph=create_graph,
allow_unused=allow_unused)
return outputs, outputs_grad
return jvp_test
def check_results(self, ref, res):
type_error = 'Result is different than expected in shape or type'
value_error = 'Result is different than expected values'
if ref is None:
self.assertTrue(res is None, type_error)
elif isinstance(ref, paddle.Tensor):
self.assertTrue(isinstance(res, paddle.Tensor), type_error)
self.assertTrue(paddle.allclose(res, ref), value_error)
else:
self.assertTrue(len(res) == len(ref), type_error)
for i in range(len(ref)):
self.check_results(ref[i], res[i])
return True
class TestVJP(TestAutogradFunctional):
def test_vjp_i1o1_no_create_graph(self):
test_cases = [
[reduce, 'A'], #noqa
[reduce_dim, 'A'], #noqa
] #noqa
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o1_no_create_graph(self):
test_cases = [
[matmul, ['A', 'B']], #noqa
[mul, ['b', 'c']], #noqa
] #noqa
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o2_no_create_graph(self):
test_cases = [
[o2, ['A', 'A']], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
v = make_v(f, inputs)
vjp, grad = self.gen_test_pairs(f, inputs, v=v)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_i2o2_omitting_v_no_create_graph(self):
test_cases = [
[o2, ['A', 'A']], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_nested_no_create_graph(self):
x = self.gen_input('a')
test_cases = [
[nested(x), 'a'], #noqa
]
for f, inputs in test_cases:
vjp, grad = self.gen_test_pairs(f, inputs)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def test_vjp_aliased_input_no_create_graph(self):
x = self.gen_input('a')
ref = self.gen_test_pairs(nested(x), 'a')[0]
aliased = self.gen_test_pairs(nested(x), x)[0]
ref_result, aliased_result = ref(), aliased()
self.check_results(ref_result, aliased_result)
def test_vjp_allowunused_no_create_graph(self):
x, y = self.gen_input('A'), self.gen_input('a')
vjp, grad = self.gen_test_pairs(unuse, [x, y], allow_unused=True)
vjp_result, grad_result = vjp(), grad()
self.check_results(grad_result, vjp_result)
def jac(grad_fn, f, inputs):
assert grad_fn in [vjp, jvp]
if grad_fn is jvp:
vs = [zeros_like(x) for x in inputs]
else:
outputs = f(*inputs)
if isinstance(outputs, paddle.Tensor):
outputs = [outputs]
vs = [zeros_like(y) for y in outputs]
JJ_cols = []
for i, v in enumerate(vs):
v = v.flatten()
for j in range(len(v)):
_v = zeros_like(v).detach()
_v[j] = 1.0
_v = _v.reshape(vs[i].shape)
_vs = vs.copy()
_vs[i] = _v
_, grads = grad_fn(f, inputs, vs)
d_outs = paddle.concat([d_out.flatten() for d_out in grads])
JJ_cols.append(d_outs)
# JJ is the fully unrolled jacobian
JJ = paddle.stack(JJ_cols)
if grad_fn is vjp:
JJ = JJ.t()
return JJ
class TestJVP(TestAutogradFunctional):
def test_jvp_i1o1_no_create_graph(self):
test_cases = [
[reduce, 'A'], #noqa
[reduce_dim, 'A'], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(jvp, f, inputs)
reverse_jac = jac(vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o1_no_create_graph(self):
test_cases = [ #noqa
[matmul, ['A', 'B']], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(jvp, f, inputs)
reverse_jac = jac(vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o2_no_create_graph(self):
test_cases = [ #noqa
[o2, ['A', 'A']], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
forward_jac = jac(jvp, f, inputs)
reverse_jac = jac(vjp, f, inputs)
self.check_results(forward_jac, reverse_jac)
def test_jvp_i2o2_omitting_v_no_create_graph(self):
test_cases = [ #noqa
[o2, ['A', 'A']], #noqa
] #noqa
for f, inputs in test_cases:
inputs = self.gen_inputs(inputs)
results_omitting_v = jvp(f, inputs)
v = [ones_like(x) for x in inputs]
results_with_v = jvp(f, inputs, v)
self.check_results(results_omitting_v, results_with_v)
if __name__ == "__main__":
unittest.main()
...@@ -12,11 +12,22 @@ ...@@ -12,11 +12,22 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import typing
import enum
import sys
import re
import inspect
import functools
import contextlib
import collections
import numpy as np import numpy as np
import paddle import paddle
from paddle.autograd.functional import _tensors from paddle.autograd.functional import _as_tensors
##########################################################
# Finite Difference Utils
##########################################################
def _product(t): def _product(t):
if isinstance(t, int): if isinstance(t, int):
return t return t
...@@ -25,7 +36,9 @@ def _product(t): ...@@ -25,7 +36,9 @@ def _product(t):
def _get_item(t, idx): def _get_item(t, idx):
assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." assert isinstance(
t,
paddle.fluid.framework.Variable), "The first argument t must be Tensor."
assert isinstance(idx, assert isinstance(idx,
int), "The second argument idx must be an int number." int), "The second argument idx must be an int number."
flat_t = paddle.reshape(t, [-1]) flat_t = paddle.reshape(t, [-1])
...@@ -33,7 +46,9 @@ def _get_item(t, idx): ...@@ -33,7 +46,9 @@ def _get_item(t, idx):
def _set_item(t, idx, value): def _set_item(t, idx, value):
assert isinstance(t, paddle.Tensor), "The first argument t must be Tensor." assert isinstance(
t,
paddle.fluid.framework.Variable), "The first argument t must be Tensor."
assert isinstance(idx, assert isinstance(idx,
int), "The second argument idx must be an int number." int), "The second argument idx must be an int number."
flat_t = paddle.reshape(t, [-1]) flat_t = paddle.reshape(t, [-1])
...@@ -42,8 +57,8 @@ def _set_item(t, idx, value): ...@@ -42,8 +57,8 @@ def _set_item(t, idx, value):
def _compute_numerical_jacobian(func, xs, delta, np_dtype): def _compute_numerical_jacobian(func, xs, delta, np_dtype):
xs = _tensors(xs, "xs") xs = list(_as_tensors(xs))
ys = _tensors(func(*xs), "ys") ys = list(_as_tensors(func(*xs)))
fin_size = len(xs) fin_size = len(xs)
fout_size = len(ys) fout_size = len(ys)
jacobian = list([] for _ in range(fout_size)) jacobian = list([] for _ in range(fout_size))
...@@ -59,11 +74,11 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): ...@@ -59,11 +74,11 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype):
orig = _get_item(xs[j], q) orig = _get_item(xs[j], q)
x_pos = orig + delta x_pos = orig + delta
xs[j] = _set_item(xs[j], q, x_pos) xs[j] = _set_item(xs[j], q, x_pos)
ys_pos = _tensors(func(*xs), "ys_pos") ys_pos = _as_tensors(func(*xs))
x_neg = orig - delta x_neg = orig - delta
xs[j] = _set_item(xs[j], q, x_neg) xs[j] = _set_item(xs[j], q, x_neg)
ys_neg = _tensors(func(*xs), "ys_neg") ys_neg = _as_tensors(func(*xs))
xs[j] = _set_item(xs[j], q, orig) xs[j] = _set_item(xs[j], q, orig)
...@@ -76,8 +91,8 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype): ...@@ -76,8 +91,8 @@ def _compute_numerical_jacobian(func, xs, delta, np_dtype):
def _compute_numerical_hessian(func, xs, delta, np_dtype): def _compute_numerical_hessian(func, xs, delta, np_dtype):
xs = _tensors(xs, "xs") xs = list(_as_tensors(xs))
ys = _tensors(func(*xs), "ys") ys = list(_as_tensors(func(*xs)))
fin_size = len(xs) fin_size = len(xs)
hessian = list([] for _ in range(fin_size)) hessian = list([] for _ in range(fin_size))
for i in range(fin_size): for i in range(fin_size):
...@@ -107,10 +122,22 @@ def _compute_numerical_hessian(func, xs, delta, np_dtype): ...@@ -107,10 +122,22 @@ def _compute_numerical_hessian(func, xs, delta, np_dtype):
return hessian return hessian
def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): def concat_to_matrix(xs, is_batched=False):
"""Concats a tuple of tuple of Jacobian/Hessian matrix into one matrix"""
rows = []
for i in range(len(xs)):
rows.append(np.concatenate([x for x in xs[i]], -1))
return np.concatenate(rows, 1) if is_batched else np.concatenate(rows, 0)
def _compute_numerical_batch_jacobian(func,
xs,
delta,
np_dtype,
merge_batch=True):
no_batch_jacobian = _compute_numerical_jacobian(func, xs, delta, np_dtype) no_batch_jacobian = _compute_numerical_jacobian(func, xs, delta, np_dtype)
xs = _tensors(xs, "xs") xs = list(_as_tensors(xs))
ys = _tensors(func(*xs), "ys") ys = list(_as_tensors(func(*xs)))
fin_size = len(xs) fin_size = len(xs)
fout_size = len(ys) fout_size = len(ys)
bs = xs[0].shape[0] bs = xs[0].shape[0]
...@@ -128,6 +155,7 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): ...@@ -128,6 +155,7 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype):
for b in range(bs): for b in range(bs):
for q in range(in_size): for q in range(in_size):
batch_jac_i_j[p][b][q] = jac[b][p][b][q] batch_jac_i_j[p][b][q] = jac[b][p][b][q]
if merge_batch:
batch_jac_i_j = np.reshape(batch_jac_i_j, (out_size, -1)) batch_jac_i_j = np.reshape(batch_jac_i_j, (out_size, -1))
batch_jac_i.append(batch_jac_i_j) batch_jac_i.append(batch_jac_i_j)
bat_jac.append(batch_jac_i) bat_jac.append(batch_jac_i)
...@@ -136,7 +164,7 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype): ...@@ -136,7 +164,7 @@ def _compute_numerical_batch_jacobian(func, xs, delta, np_dtype):
def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): def _compute_numerical_batch_hessian(func, xs, delta, np_dtype):
xs = _tensors(xs, "xs") xs = list(_as_tensors(xs))
batch_size = xs[0].shape[0] batch_size = xs[0].shape[0]
fin_size = len(xs) fin_size = len(xs)
hessian = [] hessian = []
...@@ -175,8 +203,10 @@ def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): ...@@ -175,8 +203,10 @@ def _compute_numerical_batch_hessian(func, xs, delta, np_dtype):
def _compute_numerical_vjp(func, xs, v, delta, np_dtype): def _compute_numerical_vjp(func, xs, v, delta, np_dtype):
xs = _tensors(xs, "xs") xs = _as_tensors(xs)
jacobian = np.array(_compute_numerical_jacobian(func, xs, delta, np_dtype)) jacobian = np.array(_compute_numerical_jacobian(func, xs, delta, np_dtype))
if v is None:
v = [paddle.ones_like(x) for x in xs]
flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v])
vjp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] vjp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs]
for j in range(len(xs)): for j in range(len(xs)):
...@@ -188,7 +218,7 @@ def _compute_numerical_vjp(func, xs, v, delta, np_dtype): ...@@ -188,7 +218,7 @@ def _compute_numerical_vjp(func, xs, v, delta, np_dtype):
def _compute_numerical_vhp(func, xs, v, delta, np_dtype): def _compute_numerical_vhp(func, xs, v, delta, np_dtype):
xs = _tensors(xs, "xs") xs = list(_as_tensors(xs))
hessian = np.array(_compute_numerical_hessian(func, xs, delta, np_dtype)) hessian = np.array(_compute_numerical_hessian(func, xs, delta, np_dtype))
flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v])
vhp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] vhp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs]
...@@ -198,3 +228,166 @@ def _compute_numerical_vhp(func, xs, v, delta, np_dtype): ...@@ -198,3 +228,166 @@ def _compute_numerical_vhp(func, xs, v, delta, np_dtype):
flat_v) flat_v)
vhp = [vhp[j].reshape(xs[j].shape) for j in range(len(xs))] vhp = [vhp[j].reshape(xs[j].shape) for j in range(len(xs))]
return vhp return vhp
##########################################################
# TestCases of different function.
##########################################################
def reduce(x):
return paddle.sum(x)
def reduce_dim(x):
return paddle.sum(x, axis=0)
def matmul(x, y):
return paddle.matmul(x, y)
def mul(x, y):
return x * y
def pow(x, y):
return paddle.pow(x, y)
def o2(x, y):
return paddle.multiply(x, y), paddle.matmul(x, y.t())
def unuse(x, y):
return paddle.sum(x)
def nested(x):
def inner(y):
return x * y
return inner
def square(x):
return x * x
##########################################################
# Parameterized Test Utils.
##########################################################
TEST_CASE_NAME = 'suffix'
def place(devices, key='place'):
"""A Decorator for a class which will make the class running on different
devices .
Args:
devices (Sequence[Paddle.CUDAPlace|Paddle.CPUPlace]): Device list.
key (str, optional): Defaults to 'place'.
"""
def decorate(cls):
module = sys.modules[cls.__module__].__dict__
raw_classes = {
k: v
for k, v in module.items() if k.startswith(cls.__name__)
}
for raw_name, raw_cls in raw_classes.items():
for d in devices:
test_cls = dict(raw_cls.__dict__)
test_cls.update({key: d})
new_name = raw_name + '.' + d.__class__.__name__
module[new_name] = type(new_name, (raw_cls, ), test_cls)
del module[raw_name]
return cls
return decorate
def parameterize(fields, values=None):
"""Decorator for a unittest class which make the class running on different
test cases.
Args:
fields (Sequence): The feild name sequence of test cases.
values (Sequence, optional): The test cases sequence. Defaults to None.
"""
fields = [fields] if isinstance(fields, str) else fields
params = [dict(zip(fields, vals)) for vals in values]
def decorate(cls):
test_cls_module = sys.modules[cls.__module__].__dict__
for i, values in enumerate(params):
test_cls = dict(cls.__dict__)
values = {
k: staticmethod(v) if callable(v) else v
for k, v in values.items()
}
test_cls.update(values)
name = cls.__name__ + str(i)
name = name + '.' + \
values.get('suffix') if values.get('suffix') else name
test_cls_module[name] = type(name, (cls, ), test_cls)
for m in list(cls.__dict__):
if m.startswith("test"):
delattr(cls, m)
return cls
return decorate
##########################################################
# Utils for transpose different Jacobian/Hessian matrix format.
##########################################################
# B is batch size, N is row size, M is column size.
MatrixFormat = enum.Enum('MatrixFormat', ('NBM', 'BNM', 'NMB', 'NM'))
def _np_transpose_matrix_format(src, src_format, des_format):
"""Transpose Jacobian/Hessian matrix format."""
supported_format = (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NMB)
if src_format not in supported_format or des_format not in supported_format:
raise ValueError(
f"Supported Jacobian format is {supported_format}, but got src: {src_format}, des: {des_format}"
)
src_axis = {c: i for i, c in enumerate(src_format.name)}
dst_axis = tuple(src_axis[c] for c in des_format.name)
return np.transpose(src, dst_axis)
def _np_concat_matrix_sequence(src, src_format=MatrixFormat.NM):
"""Convert a sequence of sequence of Jacobian/Hessian matrix into one huge
matrix."""
def concat_col(xs):
if src_format in (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NM):
return np.concatenate(xs, axis=-1)
else:
return np.concatenate(xs, axis=1)
def concat_row(xs):
if src_format in (MatrixFormat.NBM, MatrixFormat.NM, MatrixFormat.NMB):
return np.concatenate(xs, axis=0)
else:
return np.concatenate(xs, axis=1)
supported_format = (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NMB,
MatrixFormat.NM)
if src_format not in supported_format:
raise ValueError(
f"Supported Jacobian format is {supported_format}, but got {src_format}"
)
if not isinstance(src, typing.Sequence):
return src
if not isinstance(src[0], typing.Sequence):
src = [src]
return concat_row(tuple(concat_col(xs) for xs in src))
...@@ -26,6 +26,7 @@ from .tensor import segment_mean ...@@ -26,6 +26,7 @@ from .tensor import segment_mean
from .tensor import segment_max from .tensor import segment_max
from .tensor import segment_min from .tensor import segment_min
from .passes import fuse_resnet_unit_pass from .passes import fuse_resnet_unit_pass
import paddle.incubate.autograd
from . import nn #noqa: F401 from . import nn #noqa: F401
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.autograd.functional import Hessian, Jacobian, jvp, vjp
__all__ = [ # noqa
'vjp', 'jvp', 'Jacobian', 'Hessian'
]
...@@ -273,6 +273,7 @@ packages=['paddle', ...@@ -273,6 +273,7 @@ packages=['paddle',
'paddle.distributed.ps', 'paddle.distributed.ps',
'paddle.distributed.ps.utils', 'paddle.distributed.ps.utils',
'paddle.incubate', 'paddle.incubate',
'paddle.incubate.autograd',
'paddle.incubate.optimizer', 'paddle.incubate.optimizer',
'paddle.incubate.checkpoint', 'paddle.incubate.checkpoint',
'paddle.incubate.operators', 'paddle.incubate.operators',
......
...@@ -12,55 +12,8 @@ ...@@ -12,55 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
set -e
set +x
NIGHTLY_MODE=$1
PRECISION_TEST=$2
WITH_GPU=$3
export PADDLE_ROOT="$(cd "$PWD/../" && pwd )"
if [ ${NIGHTLY_MODE:-OFF} == "ON" ]; then
nightly_label=""
else
nightly_label="(RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY)"
echo "========================================="
echo "Unittests with nightly labels are only run at night"
echo "========================================="
fi
if disable_ut_quickly=$(python ${PADDLE_ROOT}/tools/get_quick_disable_lt.py); then
echo "========================================="
echo "The following unittests have been disabled:"
echo ${disable_ut_quickly}
echo "========================================="
else
disable_ut_quickly=''
fi
# check added ut
set +e # /*================Fixed Disabled Windows CUDA10.x MKL(PR-CI-Windows) unittests===========================*/
cp $PADDLE_ROOT/tools/check_added_ut.sh $PADDLE_ROOT/tools/check_added_ut_win.sh
bash $PADDLE_ROOT/tools/check_added_ut_win.sh
rm -rf $PADDLE_ROOT/tools/check_added_ut_win.sh
if [ -f "$PADDLE_ROOT/added_ut" ];then
added_uts=^$(awk BEGIN{RS=EOF}'{gsub(/\n/,"$|^");print}' $PADDLE_ROOT/added_ut)$
ctest -R "(${added_uts})" --output-on-failure -C Release --repeat-until-fail 3;added_ut_error=$?
rm -f $PADDLE_ROOT/added_ut
if [ "$added_ut_error" != 0 ];then
echo "========================================"
echo "Added UT should pass three additional executions"
echo "========================================"
exit 8;
fi
if nvcc --version | grep 11.2; then
echo "Only test added_ut temporarily when running in CI-Windows-inference of CUDA 11.2."
exit 0;
fi
fi
set -e
# /*==================Fixed Disabled Windows GPU MKL unittests==============================*/
# TODO: fix these unittest that is bound to fail # TODO: fix these unittest that is bound to fail
disable_wingpu_test="^test_model$|\ disable_wingpu_test="^test_model$|\
^test_dataloader_early_reset$|\ ^test_dataloader_early_reset$|\
...@@ -97,7 +50,7 @@ disable_wingpu_test="^test_model$|\ ...@@ -97,7 +50,7 @@ disable_wingpu_test="^test_model$|\
^test_bilinear_interp_op$|\ ^test_bilinear_interp_op$|\
^disable_wingpu_test$" ^disable_wingpu_test$"
# /*==================Fixed Disabled Windows GPU MKL unittests==============================*/ # /*=================Fixed Disabled Windows TRT MKL unittests=======================*/
# TODO: fix these unittest that is bound to fail # TODO: fix these unittest that is bound to fail
disable_win_trt_test="^test_trt_convert_conv2d$|\ disable_win_trt_test="^test_trt_convert_conv2d$|\
^test_trt_convert_conv2d_fusion$|\ ^test_trt_convert_conv2d_fusion$|\
...@@ -119,7 +72,13 @@ disable_win_trt_test="^test_trt_convert_conv2d$|\ ...@@ -119,7 +72,13 @@ disable_win_trt_test="^test_trt_convert_conv2d$|\
^test_trt_convert_matmul$|\ ^test_trt_convert_matmul$|\
^test_trt_convert_scale$" ^test_trt_convert_scale$"
# /*==================Fixed Disabled Windows GPU inference_api_test unittests==============================*/ # /*=============Fixed Disabled Windows CUDA11.x MKL(PR-CI-Windows-Inference) unittests=================*/
# TODO: fix these unittest that is bound to fail
disable_wingpu11_test="^test_autograd_functional_dynamic$|\
^disable_wingpu_test$"
# /*==========Fixed Disabled Windows CUDA11.x inference_api_test(PR-CI-Windows-Inference) unittests=============*/
disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\ disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\
^test_trt_dynamic_shape_ernie$|\ ^test_trt_dynamic_shape_ernie$|\
^test_trt_dynamic_shape_ernie_fp16_ser_deser$|\ ^test_trt_dynamic_shape_ernie_fp16_ser_deser$|\
...@@ -128,9 +87,8 @@ disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\ ...@@ -128,9 +87,8 @@ disable_win_inference_api_test="^trt_quant_int8_yolov3_r50_test$|\
^lite_mul_model_test$|\ ^lite_mul_model_test$|\
^paddle_infer_api_copy_tensor_tester$" ^paddle_infer_api_copy_tensor_tester$"
# /*============================================================================*/
# /*==================Fixed Disabled Windows CPU OPENBLAS unittests==============================*/ # /*==========Fixed Disabled Windows CPU OPENBLAS((PR-CI-Windows-OPENBLAS)) unittests==============================*/
# TODO: fix these unittest that is bound to fail # TODO: fix these unittest that is bound to fail
disable_wincpu_test="^jit_kernel_test$|\ disable_wincpu_test="^jit_kernel_test$|\
^test_analyzer_transformer$|\ ^test_analyzer_transformer$|\
...@@ -189,6 +147,58 @@ long_time_test="^test_gru_op$|\ ...@@ -189,6 +147,58 @@ long_time_test="^test_gru_op$|\
^test_trt_matmul_quant_dequant$|\ ^test_trt_matmul_quant_dequant$|\
^test_strided_slice_op$" ^test_strided_slice_op$"
# /*============================================================================*/
set -e
set +x
NIGHTLY_MODE=$1
PRECISION_TEST=$2
WITH_GPU=$3
export PADDLE_ROOT="$(cd "$PWD/../" && pwd )"
if [ ${NIGHTLY_MODE:-OFF} == "ON" ]; then
nightly_label=""
else
nightly_label="(RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY)"
echo "========================================="
echo "Unittests with nightly labels are only run at night"
echo "========================================="
fi
if disable_ut_quickly=$(python ${PADDLE_ROOT}/tools/get_quick_disable_lt.py); then
echo "========================================="
echo "The following unittests have been disabled:"
echo ${disable_ut_quickly}
echo "========================================="
else
disable_ut_quickly=''
fi
# check added ut
set +e
cp $PADDLE_ROOT/tools/check_added_ut.sh $PADDLE_ROOT/tools/check_added_ut_win.sh
bash $PADDLE_ROOT/tools/check_added_ut_win.sh
rm -rf $PADDLE_ROOT/tools/check_added_ut_win.sh
if [ -f "$PADDLE_ROOT/added_ut" ];then
added_uts=^$(awk BEGIN{RS=EOF}'{gsub(/\n/,"$|^");print}' $PADDLE_ROOT/added_ut)$
ctest -R "(${added_uts})" -E "$disable_wingpu11_test" --output-on-failure -C Release --repeat-until-fail 3;added_ut_error=$?
rm -f $PADDLE_ROOT/added_ut
if [ "$added_ut_error" != 0 ];then
echo "========================================"
echo "Added UT should pass three additional executions"
echo "========================================"
exit 8;
fi
if nvcc --version | grep 11.2; then
echo "Only test added_ut temporarily when running in CI-Windows-inference of CUDA 11.2."
exit 0;
fi
fi
set -e
if [ ${WITH_GPU:-OFF} == "ON" ];then if [ ${WITH_GPU:-OFF} == "ON" ];then
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册