primapi.py 7.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing

17 18
from paddle.fluid import backward, framework
from paddle.incubate.autograd import primx, utils
19 20 21


@framework.static_only
22
def forward_grad(outputs, inputs, grad_inputs=None):
23 24 25 26 27 28
    """Forward mode of automatic differentiation.

    .. note::
        **ONLY available in the static mode and primitive operators.**

    Args:
X
Xiaoxu Chen 已提交
29 30
        outputs(Tensor|Sequence[Tensor]): The output tensor or tensors.
        inputs(Tensor|Sequence[Tensor]): The input tensor or tensors.
L
Ligoml 已提交
31 32
        grad_inputs(Tensor|Sequence[Tensor]): Optional, the gradient Tensor or
            Tensors of inputs which has the same shape with inputs, Defaults to
X
Xiaoxu Chen 已提交
33
            None, in this case is equivalent to all ones.
34 35

    Returns:
X
Xiaoxu Chen 已提交
36
        grad_outputs(Tensor|Sequence[Tensor]): The gradients for outputs.
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52

    Examples:

        .. code-block:: python

            import numpy as np
            import paddle

            paddle.enable_static()
            paddle.incubate.autograd.enable_prim()

            startup_program = paddle.static.Program()
            main_program = paddle.static.Program()

            with paddle.static.program_guard(main_program, startup_program):
                x = paddle.static.data('x', shape=[1], dtype='float32')
L
Ligoml 已提交
53
                y = x * x
54
                y_grad = paddle.incubate.autograd.forward_grad(y, x)
55 56 57 58 59 60 61 62 63 64 65
                paddle.incubate.autograd.prim2orig()

            exe = paddle.static.Executor()
            exe.run(startup_program)
            y_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[y_grad])
            print(y_grad)
            # [array([4.], dtype=float32)]

            paddle.incubate.autograd.disable_prim()
            paddle.disable_static()
    """
66
    if not utils.prim_enabled():
L
Ligoml 已提交
67 68 69 70
        raise RuntimeError(
            'forward_grad must be running on primitive'
            'operators, use enable_prim to turn it on.'
        )
71

72
    if not isinstance(outputs, (framework.Variable, typing.Sequence)):
L
Ligoml 已提交
73 74 75 76
        raise TypeError(
            f'Expected outputs is Tensor|Sequence[Tesnor], '
            f'but got {type(outputs)}.'
        )
77 78

    if not isinstance(inputs, (framework.Variable, typing.Sequence)):
L
Ligoml 已提交
79 80 81 82
        raise TypeError(
            f'Expected inputs is Tensor|Sequence[Tesnor], '
            f'but got {type(inputs)}.'
        )
83

L
Ligoml 已提交
84 85 86 87 88
    ys, xs, xs_dot = (
        utils.as_tensors(outputs),
        utils.as_tensors(inputs),
        utils.as_tensors(grad_inputs),
    )
89 90 91 92 93

    block = framework.default_main_program().current_block()
    if any(x.block != block for x in xs + ys):
        raise RuntimeError(
            'Variable in inputs and targets should exist in current block of '
L
Ligoml 已提交
94 95
            'main program.'
        )
96 97 98 99 100

    primx.orig2prim(block)
    ad = primx.Transform(ys[0].block)
    _, ys_dot = ad.linearize(xs, ys, xs_dot)

101 102 103 104 105 106 107 108 109 110 111
    return ys_dot[0] if isinstance(outputs, framework.Variable) else ys_dot


@framework.static_only
def grad(outputs, inputs, grad_outputs=None):
    """Reverse mode of automatic differentiation.

    .. note::
        **ONLY available in the static mode and primitive operators**

    Args:
X
Xiaoxu Chen 已提交
112 113
        outputs(Tensor|Sequence[Tensor]): The output Tensor or Tensors.
        inputs(Tensor|Sequence[Tensor]): The input Tensor or Tensors.
L
Ligoml 已提交
114 115
        grad_outputs(Tensor|Sequence[Tensor]): Optional, the gradient Tensor or
            Tensors of outputs which has the same shape with outputs, Defaults
X
Xiaoxu Chen 已提交
116
            to None, in this case is equivalent to all ones.
117 118

    Returns:
L
Ligoml 已提交
119
        grad_inputs(Tensor|Tensors): The gradients for inputs.
120 121 122 123 124 125 126

    Examples:

        .. code-block:: python

            import numpy as np
            import paddle
X
Xiaoxu Chen 已提交
127

128 129
            paddle.enable_static()
            paddle.incubate.autograd.enable_prim()
X
Xiaoxu Chen 已提交
130

131 132 133 134 135
            startup_program = paddle.static.Program()
            main_program = paddle.static.Program()
            with paddle.static.program_guard(main_program, startup_program):
                x = paddle.static.data('x', shape=[1], dtype='float32')
                x.stop_gradients = False
L
Ligoml 已提交
136
                y = x * x
137 138
                x_grad = paddle.incubate.autograd.grad(y, x)
                paddle.incubate.autograd.prim2orig()
X
Xiaoxu Chen 已提交
139

140 141 142 143 144
            exe = paddle.static.Executor()
            exe.run(startup_program)
            x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
            print(x_grad)
            # [array([4.], dtype=float32)]
L
Ligoml 已提交
145

146 147 148 149
            paddle.incubate.autograd.disable_prim()
            paddle.disable_static()
    """
    if not utils.prim_enabled():
150 151 152 153
        grad_inputs = backward.gradients(outputs, inputs, grad_outputs)
        # backward.gradients returns a list though the inputs is a signle Tensor.
        # The follow code snippet fixes the problem by return the first element
        # of grad_inputs when the inputs is a signle Tensor.
L
Ligoml 已提交
154 155 156 157 158
        if (
            isinstance(inputs, framework.Variable)
            and isinstance(grad_inputs, typing.Sequence)
            and len(grad_inputs) > 0
        ):
159 160 161
            return grad_inputs[0]
        else:
            return grad_inputs
162 163

    if not isinstance(outputs, (framework.Variable, typing.Sequence)):
L
Ligoml 已提交
164 165 166 167
        raise TypeError(
            f'Expected outputs is Tensor|Sequence[Tesnor], '
            f'but got {type(outputs)}.'
        )
168 169

    if not isinstance(inputs, (framework.Variable, typing.Sequence)):
L
Ligoml 已提交
170 171 172 173
        raise TypeError(
            f'Expected inputs is Tensor|Sequence[Tesnor], '
            f'but got {type(inputs)}.'
        )
174

L
Ligoml 已提交
175 176 177 178 179
    ys, xs, ys_bar = (
        utils.as_tensors(outputs),
        utils.as_tensors(inputs),
        utils.as_tensors(grad_outputs),
    )
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
    block = framework.default_main_program().current_block()
    if any((x is not None and x.block != block) for x in xs + ys):
        raise RuntimeError(
            'Variable in inputs and outputs should be None or in current block of main program'
        )

    # TODO(Tongxin) without any prior knowledge about whether the program
    # is completely lowered to primitive ops, it's mandatory to run the lowering
    # pass once and again. This is obviously inefficient and needs to be
    # optimized.
    primx.orig2prim(block)
    ad = primx.Transform(block)
    xs_dot, ys_dot = ad.linearize(xs, ys)
    if any(var is None for var in ys_dot):
        raise RuntimeError(
            'Grads cannot be computed. The given outputs does not depend on inputs'
        )
    ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)

    # remove xs_dot and their constructor ops
    op_indexes = []
    for var in xs_dot:
        if var is not None:
            op_index = block.ops.index(var.op)
            if op_index < 0:
                raise ValueError(
                    f'op_index should be greater than or equal to 0, but op_index={op_index}.'
                )
            op_indexes.append(op_index)

    ad.erase_ops(sorted(op_indexes))
    ad.erase_dots(xs_dot)

    return xs_bar[0] if isinstance(inputs, framework.Variable) else xs_bar