fused_dropout_add.py 4.1 KB
Newer Older
S
ShenLiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from paddle import _C_ops
from paddle.common_ops_import import default_main_program
from paddle.fluid import core
from paddle.fluid.framework import in_dygraph_mode
from paddle.framework import LayerHelper


def fused_dropout_add(
    x, y, p=0.5, training=True, mode='upscale_in_train', name=None
):
    r"""
    Fused Dropout and Add.

    Args:
        x (Tensor): The input tensor. The data type is bfloat16, float16, float32 or float64.
        y (Tensor): The input tensor. The data type is bfloat16, float16, float32 or float64.

        p (float|int, optional): Probability of setting units to zero. Default: 0.5.
        training (bool, optional): A flag indicating whether it is in train phrase or not. Default: True.
        mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'].

            1. upscale_in_train (default), upscale the output at training time

                - train: :math:`out = x \times \frac{mask}{(1.0 - dropout\_prob)} + y`
                - inference: :math:`out = x + y`

            2. downscale_in_infer, downscale the output at inference

                - train: :math:`out = input \times mask + y`
                - inference: :math:`out = input \times (1.0 - dropout\_prob) + y`

        name (str, optional): Name for the operation, Default: None. For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        A Tensor representing the fused dropout and add, has same shape and data type as `x` .


    Examples:

        ..  code-block:: python

            # required: gpu
            import paddle
            from paddle.incubate.nn.functional import fused_dropout_add

            x = paddle.randn([4, 10], dtype='float16')
            y = paddle.randn([4, 10], dtype='float16')
            out = fused_dropout_add(x, y, p=0.5)
    """
    if isinstance(p, (int, float)):
        # fast return for p == 0
        if p == 0:
            return x + y
        elif p < 0 or p > 1:
            raise ValueError("p argument should between 0 and 1")
    if mode not in ('downscale_in_infer', 'upscale_in_train'):
        raise ValueError(
            "mode argument should be 'downscale_in_infer' or 'upscale_in_train'"
        )
    seed = None
    if in_dygraph_mode():
        if default_main_program().random_seed != 0:
            seed = default_main_program().random_seed
        out, seed_offset = _C_ops.fused_dropout_add(
            x,
            y,
            p,
            not training,
            mode,
            seed if seed is not None else 0,
            seed is not None,
        )
        return out
    else:
        helper = LayerHelper('fused_dropout_add', **locals())
        out = helper.create_variable_for_type_inference(dtype=x.dtype)
        seed_offset = helper.create_variable_for_type_inference(
            dtype=core.VarDesc.VarType.INT64, stop_gradient=True
        )

        def get_attrs(prog, dropout_prob, is_test, seed):
            if (seed is None or seed == 0) and prog.random_seed != 0:
                seed = prog.random_seed
            attrs = {
                'p': dropout_prob,
                'is_test': is_test,
                'mode': mode,
                'seed': seed if seed is not None else 0,
                'fix_seed': seed is not None,
            }
            return attrs

        attrs = get_attrs(helper.main_program, p, not training, seed)

        helper.append_op(
            type='fused_dropout_add',
            inputs={'x': x, 'y': y},
            outputs={'out': [out], 'seed_offset': [seed_offset]},
            attrs=attrs,
        )
        return out