primapi.py 8.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import logging
16 17
import typing

18
import paddle
19
from paddle.fluid import backward, core, framework
20
from paddle.incubate.autograd import primx, utils
21 22 23


@framework.static_only
24
def forward_grad(outputs, inputs, grad_inputs=None):
25 26
    """Forward mode of automatic differentiation.

27
    Note:
28
        **ONLY available in the static graph mode and primitive operators.**
29 30

    Args:
X
Xiaoxu Chen 已提交
31 32
        outputs(Tensor|Sequence[Tensor]): The output tensor or tensors.
        inputs(Tensor|Sequence[Tensor]): The input tensor or tensors.
33 34
        grad_inputs(Tensor|Sequence[Tensor]): Optional, the gradient Tensor or
            Tensors of inputs which has the same shape with inputs, Defaults to
X
Xiaoxu Chen 已提交
35
            None, in this case is equivalent to all ones.
36 37

    Returns:
X
Xiaoxu Chen 已提交
38
        grad_outputs(Tensor|Sequence[Tensor]): The gradients for outputs.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

    Examples:

        .. code-block:: python

            import numpy as np
            import paddle

            paddle.enable_static()
            paddle.incubate.autograd.enable_prim()

            startup_program = paddle.static.Program()
            main_program = paddle.static.Program()

            with paddle.static.program_guard(main_program, startup_program):
                x = paddle.static.data('x', shape=[1], dtype='float32')
55
                y = x * x
56
                y_grad = paddle.incubate.autograd.forward_grad(y, x)
57 58 59 60 61 62 63 64 65 66 67
                paddle.incubate.autograd.prim2orig()

            exe = paddle.static.Executor()
            exe.run(startup_program)
            y_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[y_grad])
            print(y_grad)
            # [array([4.], dtype=float32)]

            paddle.incubate.autograd.disable_prim()
            paddle.disable_static()
    """
68
    if not utils.prim_enabled():
69 70 71 72
        raise RuntimeError(
            'forward_grad must be running on primitive'
            'operators, use enable_prim to turn it on.'
        )
73

74
    if not isinstance(outputs, (framework.Variable, typing.Sequence)):
75 76 77 78
        raise TypeError(
            f'Expected outputs is Tensor|Sequence[Tesnor], '
            f'but got {type(outputs)}.'
        )
79 80

    if not isinstance(inputs, (framework.Variable, typing.Sequence)):
81 82 83 84
        raise TypeError(
            f'Expected inputs is Tensor|Sequence[Tesnor], '
            f'but got {type(inputs)}.'
        )
85

86 87 88 89 90
    ys, xs, xs_dot = (
        utils.as_tensors(outputs),
        utils.as_tensors(inputs),
        utils.as_tensors(grad_inputs),
    )
91 92 93 94 95

    block = framework.default_main_program().current_block()
    if any(x.block != block for x in xs + ys):
        raise RuntimeError(
            'Variable in inputs and targets should exist in current block of '
96 97
            'main program.'
        )
98 99 100 101 102

    primx.orig2prim(block)
    ad = primx.Transform(ys[0].block)
    _, ys_dot = ad.linearize(xs, ys, xs_dot)

103 104 105 106 107 108 109
    return ys_dot[0] if isinstance(outputs, framework.Variable) else ys_dot


@framework.static_only
def grad(outputs, inputs, grad_outputs=None):
    """Reverse mode of automatic differentiation.

110
    Note:
111
        **ONLY available in the static graph mode and primitive operators**
112 113

    Args:
X
Xiaoxu Chen 已提交
114 115
        outputs(Tensor|Sequence[Tensor]): The output Tensor or Tensors.
        inputs(Tensor|Sequence[Tensor]): The input Tensor or Tensors.
116 117
        grad_outputs(Tensor|Sequence[Tensor]): Optional, the gradient Tensor or
            Tensors of outputs which has the same shape with outputs, Defaults
X
Xiaoxu Chen 已提交
118
            to None, in this case is equivalent to all ones.
119 120

    Returns:
121
        grad_inputs(Tensor|Tensors): The gradients for inputs.
122 123 124 125 126 127 128

    Examples:

        .. code-block:: python

            import numpy as np
            import paddle
X
Xiaoxu Chen 已提交
129

130 131
            paddle.enable_static()
            paddle.incubate.autograd.enable_prim()
X
Xiaoxu Chen 已提交
132

133 134 135 136 137
            startup_program = paddle.static.Program()
            main_program = paddle.static.Program()
            with paddle.static.program_guard(main_program, startup_program):
                x = paddle.static.data('x', shape=[1], dtype='float32')
                x.stop_gradients = False
138
                y = x * x
139 140
                x_grad = paddle.incubate.autograd.grad(y, x)
                paddle.incubate.autograd.prim2orig()
X
Xiaoxu Chen 已提交
141

142 143 144 145 146
            exe = paddle.static.Executor()
            exe.run(startup_program)
            x_grad = exe.run(main_program, feed={'x': np.array([2.]).astype('float32')}, fetch_list=[x_grad])
            print(x_grad)
            # [array([4.], dtype=float32)]
147

148 149 150 151
            paddle.incubate.autograd.disable_prim()
            paddle.disable_static()
    """
    if not utils.prim_enabled():
152 153 154 155
        grad_inputs = backward.gradients(outputs, inputs, grad_outputs)
        # backward.gradients returns a list though the inputs is a signle Tensor.
        # The follow code snippet fixes the problem by return the first element
        # of grad_inputs when the inputs is a signle Tensor.
156 157 158 159 160
        if (
            isinstance(inputs, framework.Variable)
            and isinstance(grad_inputs, typing.Sequence)
            and len(grad_inputs) > 0
        ):
161 162 163
            return grad_inputs[0]
        else:
            return grad_inputs
164 165

    if not isinstance(outputs, (framework.Variable, typing.Sequence)):
166 167 168 169
        raise TypeError(
            f'Expected outputs is Tensor|Sequence[Tesnor], '
            f'but got {type(outputs)}.'
        )
170 171

    if not isinstance(inputs, (framework.Variable, typing.Sequence)):
172 173 174 175
        raise TypeError(
            f'Expected inputs is Tensor|Sequence[Tesnor], '
            f'but got {type(inputs)}.'
        )
176

177 178 179 180 181
    ys, xs, ys_bar = (
        utils.as_tensors(outputs),
        utils.as_tensors(inputs),
        utils.as_tensors(grad_outputs),
    )
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    block = framework.default_main_program().current_block()
    if any((x is not None and x.block != block) for x in xs + ys):
        raise RuntimeError(
            'Variable in inputs and outputs should be None or in current block of main program'
        )

    # TODO(Tongxin) without any prior knowledge about whether the program
    # is completely lowered to primitive ops, it's mandatory to run the lowering
    # pass once and again. This is obviously inefficient and needs to be
    # optimized.
    primx.orig2prim(block)
    ad = primx.Transform(block)
    xs_dot, ys_dot = ad.linearize(xs, ys)
    if any(var is None for var in ys_dot):
        raise RuntimeError(
            'Grads cannot be computed. The given outputs does not depend on inputs'
        )
    ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, ys_bar)

    # remove xs_dot and their constructor ops
    op_indexes = []
    for var in xs_dot:
        if var is not None:
            op_index = block.ops.index(var.op)
            if op_index < 0:
                raise ValueError(
                    f'op_index should be greater than or equal to 0, but op_index={op_index}.'
                )
            op_indexes.append(op_index)

    ad.erase_ops(sorted(op_indexes))
    ad.erase_dots(xs_dot)

    return xs_bar[0] if isinstance(inputs, framework.Variable) else xs_bar
216 217 218 219 220


@framework.static_only
def to_prim(blocks):
    """Search nonbasic ops which have be registered composite rules and replace them with primitive ops."""
221 222
    if not core.enable_prim_forward():
        return
223 224
    if isinstance(blocks, paddle.fluid.framework.Block):
        logging.info("Atomize composite op to primitive ops begin.")
225
        main_program = blocks.program
226 227
    elif isinstance(blocks, typing.Sequence):
        for item in blocks:
228 229 230 231 232
            if not isinstance(item, paddle.fluid.framework.Block):
                raise TypeError(
                    f"Expect block or sequence of blocks, but sequence contains {type(item)}."
                )
        main_program = blocks[0].program
233
    else:
234 235 236 237 238 239
        raise TypeError(
            f"Expect block or sequence of blocks, but got {type(blocks)}."
        )
    with framework.program_guard(main_program):
        primx._lower_composite(blocks)
    return