未验证 提交 6af7b42b 编写于 作者: W wangzhen38 提交者: GitHub

[remove fluid] drop_out API (#48586)

* [remove fluid] drop_out PI

* [remove fluid] drop_out PI

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm
上级 23299c70
......@@ -53,10 +53,8 @@ __all__ = [
'Pool2D',
'Linear',
'BatchNorm',
'Dropout',
'Embedding',
'GRUUnit',
'LayerNorm',
'NCE',
'PRelu',
'BilinearTensorProduct',
......@@ -1184,124 +1182,6 @@ class BatchNorm(layers.Layer):
return self._helper.append_activation(batch_norm_out, self._act)
class Dropout(layers.Layer):
"""
This interface is used to construct a callable object of the ``Dropout`` class.
For more details, refer to code examples.
Drop or keep each element of input independently. Dropout is a regularization
technique for reducing overfitting by preventing neuron co-adaption during
training. The dropout operator randomly sets (according to the given dropout
probability) the outputs of some units to zero, while others are remain
unchanged.
Dropout layer can be removed for efficiency concern.
Parameters:
p (float, optional): Probability of setting units to zero. Default: 0.5
seed (int, optional): A Python integer used to create random seeds. If this
parameter is set to None, a random seed is used.
NOTE: If an integer seed is given, always the same output
units will be dropped. DO NOT use a fixed seed in training. Default: None.
dropout_implementation(string, optional): ['downgrade_in_infer'(default)|'upscale_in_train']
1. downgrade_in_infer(default), downgrade the outcome at inference
- train: out = input * mask
- inference: out = input * (1.0 - p)
(mask is a tensor same shape with input, value is 0 or 1
ratio of 0 is dropout_prob)
2. upscale_in_train, upscale the outcome at training time
- train: out = input * mask / ( 1.0 - p )
- inference: out = input
(mask is a tensor same shape with input, value is 0 or 1
ratio of 0 is p)
is_test (bool, optional): A flag indicating whether it is in test phrase or not.
This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``.
Default: False.
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
import numpy as np
x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard():
x = to_variable(x)
m = fluid.dygraph.Dropout(p=0.5)
droped_train = m(x)
# switch to eval mode
m.eval()
droped_eval = m(x)
"""
def __init__(
self,
p=0.5,
seed=None,
dropout_implementation="downgrade_in_infer",
is_test=False,
):
super().__init__()
assert isinstance(p, (float, int)), "p argument should be a number"
assert 0 <= p <= 1, "p argument should between 0 and 1"
self._dropout_prob = p
assert seed is None or isinstance(
seed, int
), "seed argument should be None or a integer"
self._seed = seed
assert dropout_implementation in (
'downgrade_in_infer',
'upscale_in_train',
), "dropout_implementation argument should be 'downgrade_in_infer' or 'upscale_in_train'"
self._dropout_implementation = dropout_implementation
self._is_test = is_test
def forward(self, input):
# fast return for p == 0
if self._dropout_prob == 0:
return input
prog = default_main_program()
if (self._seed is None or self._seed == 0) and prog.random_seed != 0:
self._seed = prog.random_seed
attrs = {
'dropout_prob': self._dropout_prob,
'is_test': not self.training
if _non_static_mode()
else self._is_test,
'fix_seed': self._seed is not None,
'seed': self._seed if self._seed is not None else 0,
'dropout_implementation': self._dropout_implementation,
}
if _non_static_mode():
attrs = sum(attrs.items(), ())
out, mask = _legacy_C_ops.dropout(input, *attrs)
return out
out = self._helper.create_variable_for_type_inference(dtype=input.dtype)
mask = self._helper.create_variable_for_type_inference(
dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
)
self._helper.append_op(
type='dropout',
inputs={'X': [input]},
outputs={'Out': [out], 'Mask': [mask]},
attrs=attrs,
)
return out
class Embedding(layers.Layer):
r"""
:alias_main: paddle.nn.Embedding
......@@ -1483,214 +1363,6 @@ class Embedding(layers.Layer):
return out
class LayerNorm(layers.Layer):
r"""
:alias_main: paddle.nn.LayerNorm
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
:old_api: paddle.fluid.dygraph.LayerNorm
This interface is used to construct a callable object of the ``LayerNorm`` class.
For more details, refer to code examples.
It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
The formula is as follows:
.. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
- :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
- :math:`H`: the number of hidden units in a layers
- :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
- :math:`g`: the trainable scale parameter.
- :math:`b`: the trainable bias parameter.
Parameters:
normalized_shape(int or list or tuple): Input shape from an expected input of
size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`.
If it is a single integer, this module will normalize over the last dimension
which is expected to be of that specific size.
scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default: True.
shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
normalization. Default: True.
epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05.
param_attr(ParamAttr, optional): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default: None.
bias_attr(ParamAttr, optional): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default: None.
act(str, optional): Activation to be applied to the output of layer normalization.
Default: None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
import numpy
x = numpy.random.random((3, 32, 32)).astype('float32')
with fluid.dygraph.guard():
x = to_variable(x)
layerNorm = fluid.LayerNorm([32, 32])
ret = layerNorm(x)
"""
def __init__(
self,
normalized_shape,
scale=True,
shift=True,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None,
dtype='float32',
):
super().__init__()
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = [normalized_shape]
self._normalized_shape = list(normalized_shape)
self._scale = scale
self._shift = shift
self._epsilon = epsilon
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self._dtype = dtype
param_shape = [np.prod(self._normalized_shape)]
if self._scale:
self.weight = self.create_parameter(
attr=self._param_attr,
shape=param_shape,
dtype=self._dtype,
default_initializer=Constant(1.0),
)
else:
if self._param_attr:
logging.warn("param_attr are only available with scale is True")
self.weight = None
if self._shift:
assert self._bias_attr is not False
self.bias = self.create_parameter(
attr=self._bias_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=True,
)
else:
if self._bias_attr:
logging.warn("bias_attr are only available with shift is True")
self.bias = None
def forward(self, input):
input_shape = list(input.shape)
input_ndim = len(input_shape)
normalized_ndim = len(self._normalized_shape)
self._begin_norm_axis = input_ndim - normalized_ndim
if (
input_ndim < normalized_ndim
or input_shape[self._begin_norm_axis :] != self._normalized_shape
):
str_normalized_shape = str(self._normalized_shape)
raise ValueError(
'Given normalized_shape is '
+ str_normalized_shape
+ ', expected input with shape [*, '
+ str_normalized_shape[1:]
+ ', but got input shape '
+ str(input_shape)
)
if _non_static_mode():
if in_dygraph_mode():
pre_act, _, _, = _C_ops.layer_norm(
input,
self.weight,
self.bias,
self._epsilon,
self._begin_norm_axis,
)
return dygraph_utils._append_activation_in_dygraph(
pre_act, act=self._act
)
else:
pre_act, _, _ = _legacy_C_ops.layer_norm(
input,
self.weight,
self.bias,
'epsilon',
self._epsilon,
'begin_norm_axis',
self._begin_norm_axis,
)
return dygraph_utils._append_activation_in_dygraph(
pre_act, act=self._act
)
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'LayerNorm'
)
inputs = dict()
inputs['X'] = [input]
if self._scale:
inputs['Scale'] = [self.weight]
if self._shift:
inputs['Bias'] = [self.bias]
attrs = {
"epsilon": self._epsilon,
"begin_norm_axis": self._begin_norm_axis,
}
# create output
mean_out = self._helper.create_variable_for_type_inference(
dtype=self._dtype, stop_gradient=True
)
variance_out = self._helper.create_variable_for_type_inference(
dtype=self._dtype, stop_gradient=True
)
layer_norm_out = self._helper.create_variable_for_type_inference(
self._dtype
)
self._helper.append_op(
type="layer_norm",
inputs=inputs,
outputs={
"Y": layer_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={
"epsilon": self._epsilon,
"begin_norm_axis": self._begin_norm_axis,
},
)
return self._helper.append_activation(layer_norm_out, act=self._act)
class GRUUnit(layers.Layer):
"""
**GRU unit layer**
......
......@@ -18,13 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid.dygraph import (
Embedding,
Layer,
LayerNorm,
Linear,
to_variable,
)
from paddle.fluid.dygraph import Embedding, Layer, Linear, to_variable
from paddle.optimizer.lr import NoamDecay
"""
......@@ -245,9 +239,9 @@ class PrePostProcessLayer(Layer):
super().__init__()
for cmd in process_cmd:
if cmd == "n":
self._layer_norm = LayerNorm(
self._layer_norm = paddle.nn.LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
),
bias_attr=fluid.ParamAttr(
......
......@@ -18,7 +18,7 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, LayerNorm, to_variable
from paddle.fluid.dygraph import Embedding, Layer, to_variable
from paddle.fluid.layers.utils import map_structure
from paddle.jit.api import dygraph_to_static_func
from paddle.nn import Linear
......@@ -59,9 +59,9 @@ class PrePostProcessLayer(Layer):
self.add_sublayer(
"layer_norm_%d"
% len([layer for layer in self.children()]),
LayerNorm(
paddle.nn.LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
),
bias_attr=fluid.ParamAttr(
......
......@@ -286,7 +286,7 @@ class TestDygraphLayerNormAPIError(unittest.TestCase):
with program_guard(Program(), Program()):
paddle.enable_static()
layer_norm = fluid.LayerNorm([32, 32])
layer_norm = paddle.nn.LayerNorm([32, 32])
# the input of LayerNorm must be Variable.
x1 = np.random.random((3, 32, 32)).astype('float32')
self.assertRaises(TypeError, layer_norm, x1)
......
......@@ -538,7 +538,7 @@ class TestDropoutFAPI(unittest.TestCase):
res10 = paddle.nn.functional.dropout(
x=input, p=1.0, training=True
)
dropout = paddle.fluid.dygraph.Dropout(
dropout = paddle.nn.Dropout(
p=0,
)
res11 = dropout(input)
......
......@@ -21,14 +21,7 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.nn import (
NCE,
BatchNorm,
Embedding,
GroupNorm,
LayerNorm,
PRelu,
)
from paddle.fluid.dygraph.nn import NCE, BatchNorm, Embedding, GroupNorm, PRelu
from paddle.nn import Linear
......@@ -212,8 +205,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
self.emb1 = Embedding([1000, 100])
self.emb2 = Embedding([2000, 200])
self.layer_norm_1 = LayerNorm([10])
self.layer_norm_2 = LayerNorm(10)
self.layer_norm_1 = paddle.nn.LayerNorm([10])
self.layer_norm_2 = paddle.nn.LayerNorm(10)
self.nce1 = NCE(10000, 100)
self.nce2 = NCE(10000, 100)
......
......@@ -20,7 +20,7 @@ from test_imperative_base import new_program_scope
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid import Embedding, Layer, LayerNorm, core
from paddle.fluid import Embedding, Layer, core
from paddle.fluid.dygraph import guard, to_variable
from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
from paddle.jit import TracedLayer
......@@ -399,9 +399,9 @@ class PrePostProcessLayer(Layer):
super().__init__()
for cmd in process_cmd:
if cmd == "n":
self._layer_norm = LayerNorm(
self._layer_norm = paddle.nn.LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
),
bias_attr=fluid.ParamAttr(
......
......@@ -375,7 +375,7 @@ class TestDygraphLayerNormAPIError(unittest.TestCase):
with program_guard(Program(), Program()):
paddle.enable_static()
layer_norm = fluid.LayerNorm([32, 32])
layer_norm = paddle.nn.LayerNorm([32, 32])
# the input of LayerNorm must be Variable.
x1 = np.random.random((3, 32, 32)).astype('float32')
self.assertRaises(TypeError, layer_norm, x1)
......
......@@ -33,7 +33,7 @@ class TestDygraphLayerNormv2(unittest.TestCase):
def compute_v1(x):
with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:])
ln = paddle.nn.LayerNorm(shape[1:])
y = ln(paddle.to_tensor(x))
return y.numpy()
......@@ -57,7 +57,7 @@ class TestDygraphLayerNormv2(unittest.TestCase):
def compute_v1(x):
with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:])
ln = paddle.nn.LayerNorm(shape[1:])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = ln(x1)
......@@ -91,7 +91,7 @@ class TestDygraphLayerNormv2(unittest.TestCase):
def compute_v1(x_np):
with program_guard(Program(), Program()):
ln = fluid.dygraph.LayerNorm(shape[1:])
ln = paddle.nn.LayerNorm(shape[1:])
x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype)
y = ln(x)
exe.run(fluid.default_startup_program())
......@@ -123,7 +123,7 @@ class TestLayerNormFunction(unittest.TestCase):
def compute_v0(x):
with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:])
ln = paddle.nn.LayerNorm(shape[1:])
y = ln(paddle.to_tensor(x))
return y.numpy()
......@@ -141,7 +141,7 @@ class TestLayerNormFunction(unittest.TestCase):
def compute_v3(x):
with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[-1])
ln = paddle.nn.LayerNorm(shape[-1])
y = ln(paddle.to_tensor(x))
return y.numpy()
......
......@@ -120,50 +120,6 @@ class TestLayer(LayerTest):
ret = custom(x, do_linear2=True)
np.testing.assert_array_equal(ret.numpy().shape, [3, 1])
def test_dropout(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False,
)
dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
ret = dropout(t)
ret2 = fluid.layers.dropout(
t, dropout_prob=0.35, seed=1, is_test=False
)
static_ret, static_ret2 = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret, ret2]
)
with self.dynamic_graph():
with _test_eager_guard():
t = base.to_variable(inp)
dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
dy_eager_ret = dropout(t)
dy_eager_ret2 = fluid.layers.dropout(
t, dropout_prob=0.35, seed=1, is_test=False
)
dy_eager_ret_value = dy_eager_ret.numpy()
dy_eager_ret2_value = dy_eager_ret2.numpy()
t = base.to_variable(inp)
dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
dy_ret = dropout(t)
dy_ret2 = fluid.layers.dropout(
t, dropout_prob=0.35, seed=1, is_test=False
)
dy_ret_value = dy_ret.numpy()
dy_ret2_value = dy_ret2.numpy()
np.testing.assert_array_equal(dy_eager_ret_value, dy_eager_ret2_value)
np.testing.assert_array_equal(static_ret, dy_eager_ret_value)
np.testing.assert_array_equal(static_ret, static_ret2)
np.testing.assert_array_equal(dy_ret_value, dy_ret2_value)
np.testing.assert_array_equal(static_ret, dy_ret_value)
def test_linear(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
......@@ -284,107 +240,6 @@ class TestLayer(LayerTest):
self.assertRaises(TypeError, test_type)
def test_layer_norm(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False,
)
ret = layers.layer_norm(
t,
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
static_ret = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret]
)[0]
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False,
)
lm = nn.LayerNorm(
normalized_shape=[32, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
ret = lm(t)
static_ret2 = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret]
)[0]
with self.dynamic_graph():
with _test_eager_guard():
lm = nn.LayerNorm(
normalized_shape=[32, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
dy_eager_ret = lm(base.to_variable(inp))
dy_eager_ret_value = dy_eager_ret.numpy()
lm = nn.LayerNorm(
normalized_shape=[32, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
dy_ret = lm(base.to_variable(inp))
dy_ret_value = dy_ret.numpy()
with self.dynamic_graph():
with _test_eager_guard():
lm = nn.LayerNorm(
normalized_shape=[32, 32],
shift=False,
scale=False,
param_attr=fluid.initializer.ConstantInitializer(value=1),
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
lm(base.to_variable(inp))
self.assertFalse(hasattr(lm, "_scale_w"))
self.assertFalse(hasattr(lm, "_bias_w"))
lm = nn.LayerNorm(
normalized_shape=[32, 32],
shift=False,
scale=False,
param_attr=fluid.initializer.ConstantInitializer(value=1),
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
lm(base.to_variable(inp))
self.assertFalse(hasattr(lm, "_scale_w"))
self.assertFalse(hasattr(lm, "_bias_w"))
np.testing.assert_array_equal(static_ret, static_ret2)
np.testing.assert_array_equal(dy_eager_ret_value, static_ret2)
np.testing.assert_array_equal(dy_ret_value, static_ret2)
with self.dynamic_graph():
with _test_eager_guard():
lm = nn.LayerNorm(
normalized_shape=[16, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
with self.assertRaises(ValueError):
lm(base.to_variable(inp))
lm = nn.LayerNorm(
normalized_shape=[16, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid',
)
with self.assertRaises(ValueError):
lm(base.to_variable(inp))
def test_SyncBatchNorm(self):
if core.is_compiled_with_cuda():
with self.static_graph():
......
......@@ -13,9 +13,11 @@
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle
import paddle.fluid as fluid
class TestModelAverage(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册