# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle from paddle import _legacy_C_ops from paddle.fluid.framework import _non_static_mode class FusedDropout(paddle.nn.Layer): r""" Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during training as described in the paper: `Improving neural networks by preventing co-adaptation of feature detectors `_ The dropout operator randomly sets the outputs of some units to zero, while upscale others according to the given dropout probability. It is an optimized implementation for ``paddle.nn.Dropout``. In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. Parameters: p (float|int, optional): Probability of setting units to zero. Default: 0.5 axis (int|list|tuple, optional): The axis along which the dropout is performed. Default: None. mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'] 1. upscale_in_train (default), upscale the output at training time - train: :math:`out = input \times \frac{mask}{(1.0 - p)}` - inference: :math:`out = input` 2. downscale_in_infer, downscale the output at inference - train: :math:`out = input \times mask` - inference: :math:`out = input \times (1.0 - p)` name (str, optional): Name for the operation, Default: None. For more information, please refer to :ref:`api_guide_Name`. Shape: - input: N-D tensor. - output: N-D tensor, the same shape as input. Examples: .. code-block:: python import paddle x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype="float32") m = paddle.incubate.nn.FusedDropout(p=0.5) y_train = m(x) print(y_train) # Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [[2., 0., 6.], # [0., 0., 0.]]) m.eval() # switch the model to test phase y_test = m(x) print(y_test) # Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, # [[1., 2., 3.], # [4., 5., 6.]]) """ def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None): super().__init__() if not isinstance(p, (float, int)): raise TypeError("p argument should be a number") if p < 0 or p > 1: raise ValueError("p argument should between 0 and 1") mode = ( 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode ) # semantic transfer if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" ) if axis and not isinstance(axis, (int, list, tuple)): raise TypeError("datatype of axis argument should be int or list") self.p = p self.mode = mode self.name = name self.axis = None if axis is not None: self.axis = [axis] if isinstance(axis, int) else list(axis) def forward(self, input): # fast return for p == 0 if self.p == 0: return input if self.axis is not None and _non_static_mode(): seed = None if paddle.static.default_main_program().random_seed != 0: seed = paddle.static.default_main_program().random_seed out, mask = _legacy_C_ops.dropout_nd( input, 'dropout_prob', self.p, 'is_test', not self.training, 'fix_seed', seed is not None, 'seed', seed if seed is not None else 0, 'dropout_implementation', self.mode, 'axis', self.axis, ) else: out = paddle.nn.functional.dropout( input, p=self.p, axis=self.axis, training=self.training, mode=self.mode, name=self.name, ) return out def extra_repr(self): name_str = f', name={self.name}' if self.name else '' return 'p={}, axis={}, mode={}{}'.format( self.p, self.axis, self.mode, name_str )