未验证 提交 1c95631f 编写于 作者: Z zhiboniu 提交者: GitHub

unset fluid api in nn.layer (#34129)

上级 1cb4c154
...@@ -15,6 +15,11 @@ ...@@ -15,6 +15,11 @@
# TODO: import all neural network related api under this directory, # TODO: import all neural network related api under this directory,
# including layers, linear, conv, rnn etc. # including layers, linear, conv, rnn etc.
from ..fluid.dygraph.layers import Layer # noqa: F401
from ..fluid.dygraph.container import LayerList # noqa: F401
from ..fluid.dygraph.container import ParameterList # noqa: F401
from ..fluid.dygraph.container import Sequential # noqa: F401
from .clip import ClipGradByGlobalNorm # noqa: F401 from .clip import ClipGradByGlobalNorm # noqa: F401
from .clip import ClipGradByNorm # noqa: F401 from .clip import ClipGradByNorm # noqa: F401
from .clip import ClipGradByValue # noqa: F401 from .clip import ClipGradByValue # noqa: F401
...@@ -130,10 +135,6 @@ from .utils.spectral_norm_hook import spectral_norm ...@@ -130,10 +135,6 @@ from .utils.spectral_norm_hook import spectral_norm
# TODO: remove loss, keep it for too many used in unitests # TODO: remove loss, keep it for too many used in unitests
from .layer import loss # noqa: F401 from .layer import loss # noqa: F401
from ..fluid.dygraph.layers import Layer # noqa: F401
from ..fluid.dygraph.container import LayerList # noqa: F401
from ..fluid.dygraph.container import ParameterList # noqa: F401
from ..fluid.dygraph.container import Sequential # noqa: F401
from . import utils # noqa: F401 from . import utils # noqa: F401
from . import functional # noqa: F401 from . import functional # noqa: F401
......
...@@ -14,18 +14,18 @@ ...@@ -14,18 +14,18 @@
# TODO: define activation functions of neural network # TODO: define activation functions of neural network
from ...fluid.dygraph import layers
from ...fluid import core from ...fluid import core
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
from ...fluid.param_attr import ParamAttr from ...framework import ParamAttr
from ...fluid.initializer import Constant from ..initializer import Constant
from paddle.framework import get_default_dtype from paddle.framework import get_default_dtype
from .. import functional as F from .. import functional as F
from paddle.nn import Layer
__all__ = [] __all__ = []
class ELU(layers.Layer): class ELU(Layer):
r""" r"""
ELU Activation. ELU Activation.
...@@ -67,7 +67,7 @@ class ELU(layers.Layer): ...@@ -67,7 +67,7 @@ class ELU(layers.Layer):
return 'alpha={}{}'.format(self._alpha, name_str) return 'alpha={}{}'.format(self._alpha, name_str)
class GELU(layers.Layer): class GELU(Layer):
r""" r"""
GELU Activation. GELU Activation.
...@@ -120,7 +120,7 @@ class GELU(layers.Layer): ...@@ -120,7 +120,7 @@ class GELU(layers.Layer):
return 'approximate={}{}'.format(self._approximate, name_str) return 'approximate={}{}'.format(self._approximate, name_str)
class Hardshrink(layers.Layer): class Hardshrink(Layer):
r""" r"""
Hardshrink Activation Hardshrink Activation
...@@ -168,7 +168,7 @@ class Hardshrink(layers.Layer): ...@@ -168,7 +168,7 @@ class Hardshrink(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str) return 'threshold={}{}'.format(self._threshold, name_str)
class Hardswish(layers.Layer): class Hardswish(Layer):
r""" r"""
Hardswish activation Hardswish activation
...@@ -218,7 +218,7 @@ class Hardswish(layers.Layer): ...@@ -218,7 +218,7 @@ class Hardswish(layers.Layer):
return name_str return name_str
class Tanh(layers.Layer): class Tanh(Layer):
r""" r"""
Tanh Activation. Tanh Activation.
...@@ -259,7 +259,7 @@ class Tanh(layers.Layer): ...@@ -259,7 +259,7 @@ class Tanh(layers.Layer):
return name_str return name_str
class Hardtanh(layers.Layer): class Hardtanh(Layer):
r""" r"""
Hardtanh Activation Hardtanh Activation
...@@ -305,7 +305,7 @@ class Hardtanh(layers.Layer): ...@@ -305,7 +305,7 @@ class Hardtanh(layers.Layer):
return 'min={}, max={}{}'.format(self._min, self._max, name_str) return 'min={}, max={}{}'.format(self._min, self._max, name_str)
class PReLU(layers.Layer): class PReLU(Layer):
""" """
PReLU Activation. PReLU Activation.
...@@ -377,7 +377,7 @@ class PReLU(layers.Layer): ...@@ -377,7 +377,7 @@ class PReLU(layers.Layer):
self._num_parameters, self._init, self._dtype, name_str) self._num_parameters, self._init, self._dtype, name_str)
class ReLU(layers.Layer): class ReLU(Layer):
""" """
ReLU Activation. ReLU Activation.
...@@ -415,7 +415,7 @@ class ReLU(layers.Layer): ...@@ -415,7 +415,7 @@ class ReLU(layers.Layer):
return name_str return name_str
class ReLU6(layers.Layer): class ReLU6(Layer):
""" """
ReLU6 Activation ReLU6 Activation
...@@ -454,7 +454,7 @@ class ReLU6(layers.Layer): ...@@ -454,7 +454,7 @@ class ReLU6(layers.Layer):
return name_str return name_str
class SELU(layers.Layer): class SELU(Layer):
r""" r"""
SELU Activation SELU Activation
...@@ -505,7 +505,7 @@ class SELU(layers.Layer): ...@@ -505,7 +505,7 @@ class SELU(layers.Layer):
name_str) name_str)
class LeakyReLU(layers.Layer): class LeakyReLU(Layer):
r""" r"""
Leaky ReLU Activation. Leaky ReLU Activation.
...@@ -553,7 +553,7 @@ class LeakyReLU(layers.Layer): ...@@ -553,7 +553,7 @@ class LeakyReLU(layers.Layer):
return 'negative_slope={}{}'.format(self._negative_slope, name_str) return 'negative_slope={}{}'.format(self._negative_slope, name_str)
class Sigmoid(layers.Layer): class Sigmoid(Layer):
""" """
this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x. this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x.
...@@ -593,7 +593,7 @@ class Sigmoid(layers.Layer): ...@@ -593,7 +593,7 @@ class Sigmoid(layers.Layer):
return name_str return name_str
class Hardsigmoid(layers.Layer): class Hardsigmoid(Layer):
r""" r"""
This interface is used to construct a callable object of the ``Hardsigmoid`` class. This interface is used to construct a callable object of the ``Hardsigmoid`` class.
This layer calcluate the `hardsigmoid` of input x. This layer calcluate the `hardsigmoid` of input x.
...@@ -644,7 +644,7 @@ class Hardsigmoid(layers.Layer): ...@@ -644,7 +644,7 @@ class Hardsigmoid(layers.Layer):
return name_str return name_str
class Softplus(layers.Layer): class Softplus(Layer):
r""" r"""
Softplus Activation Softplus Activation
...@@ -689,7 +689,7 @@ class Softplus(layers.Layer): ...@@ -689,7 +689,7 @@ class Softplus(layers.Layer):
name_str) name_str)
class Softshrink(layers.Layer): class Softshrink(Layer):
r""" r"""
Softshrink Activation Softshrink Activation
...@@ -734,7 +734,7 @@ class Softshrink(layers.Layer): ...@@ -734,7 +734,7 @@ class Softshrink(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str) return 'threshold={}{}'.format(self._threshold, name_str)
class Softsign(layers.Layer): class Softsign(Layer):
r""" r"""
Softsign Activation Softsign Activation
...@@ -773,7 +773,7 @@ class Softsign(layers.Layer): ...@@ -773,7 +773,7 @@ class Softsign(layers.Layer):
return name_str return name_str
class Swish(layers.Layer): class Swish(Layer):
r""" r"""
Swish Activation. Swish Activation.
...@@ -812,7 +812,7 @@ class Swish(layers.Layer): ...@@ -812,7 +812,7 @@ class Swish(layers.Layer):
return name_str return name_str
class Tanhshrink(layers.Layer): class Tanhshrink(Layer):
""" """
Tanhshrink Activation Tanhshrink Activation
...@@ -851,7 +851,7 @@ class Tanhshrink(layers.Layer): ...@@ -851,7 +851,7 @@ class Tanhshrink(layers.Layer):
return name_str return name_str
class ThresholdedReLU(layers.Layer): class ThresholdedReLU(Layer):
r""" r"""
Thresholded ReLU Activation Thresholded ReLU Activation
...@@ -895,7 +895,7 @@ class ThresholdedReLU(layers.Layer): ...@@ -895,7 +895,7 @@ class ThresholdedReLU(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str) return 'threshold={}{}'.format(self._threshold, name_str)
class Silu(layers.Layer): class Silu(Layer):
""" """
Silu Activation. Silu Activation.
.. math:: .. math::
...@@ -933,7 +933,7 @@ class Silu(layers.Layer): ...@@ -933,7 +933,7 @@ class Silu(layers.Layer):
return name_str return name_str
class LogSigmoid(layers.Layer): class LogSigmoid(Layer):
r""" r"""
LogSigmoid Activation. LogSigmoid Activation.
...@@ -972,7 +972,7 @@ class LogSigmoid(layers.Layer): ...@@ -972,7 +972,7 @@ class LogSigmoid(layers.Layer):
return name_str return name_str
class Softmax(layers.Layer): class Softmax(Layer):
r""" r"""
Softmax Activation. Softmax Activation.
...@@ -1099,7 +1099,7 @@ class Softmax(layers.Layer): ...@@ -1099,7 +1099,7 @@ class Softmax(layers.Layer):
return 'axis={}{}'.format(self._axis, name_str) return 'axis={}{}'.format(self._axis, name_str)
class LogSoftmax(layers.Layer): class LogSoftmax(Layer):
r""" r"""
This operator implements the log_softmax layer. The calculation process is as follows: This operator implements the log_softmax layer. The calculation process is as follows:
...@@ -1157,7 +1157,7 @@ class LogSoftmax(layers.Layer): ...@@ -1157,7 +1157,7 @@ class LogSoftmax(layers.Layer):
return 'axis={}{}'.format(self._axis, name_str) return 'axis={}{}'.format(self._axis, name_str)
class Maxout(layers.Layer): class Maxout(Layer):
r""" r"""
Maxout Activation. Maxout Activation.
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
# TODO: define the common classes to build a neural network # TODO: define the common classes to build a neural network
import paddle import paddle
from ...fluid.dygraph import Flatten # noqa: F401 from ...fluid.dygraph import Flatten # noqa: F401
from ...fluid.dygraph import layers
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
from .. import functional as F from .. import functional as F
from ...fluid.framework import _dygraph_tracer from ...fluid.framework import _dygraph_tracer
from paddle.nn import Layer
__all__ = [] __all__ = []
...@@ -30,7 +30,7 @@ def _npairs(x, n): ...@@ -30,7 +30,7 @@ def _npairs(x, n):
return x return x
class Linear(layers.Layer): class Linear(Layer):
r""" r"""
Fully-connected linear transformation layer. For each input :math:`X` , Fully-connected linear transformation layer. For each input :math:`X` ,
...@@ -135,7 +135,7 @@ class Linear(layers.Layer): ...@@ -135,7 +135,7 @@ class Linear(layers.Layer):
self.weight.shape[0], self.weight.shape[1], self._dtype, name_str) self.weight.shape[0], self.weight.shape[1], self._dtype, name_str)
class Upsample(layers.Layer): class Upsample(Layer):
""" """
This op resizes a batch of images. This op resizes a batch of images.
...@@ -385,7 +385,7 @@ class Upsample(layers.Layer): ...@@ -385,7 +385,7 @@ class Upsample(layers.Layer):
self.data_format, name_str) self.data_format, name_str)
class UpsamplingNearest2D(layers.Layer): class UpsamplingNearest2D(Layer):
""" """
This op upsamples a batch of images, using nearest neighbours' pixel values. This op upsamples a batch of images, using nearest neighbours' pixel values.
The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w),
...@@ -470,7 +470,7 @@ class UpsamplingNearest2D(layers.Layer): ...@@ -470,7 +470,7 @@ class UpsamplingNearest2D(layers.Layer):
name_str) name_str)
class UpsamplingBilinear2D(layers.Layer): class UpsamplingBilinear2D(Layer):
""" """
This op upsamples a batch of images, using bilinear' pixel values. This op upsamples a batch of images, using bilinear' pixel values.
The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w),
...@@ -556,7 +556,7 @@ class UpsamplingBilinear2D(layers.Layer): ...@@ -556,7 +556,7 @@ class UpsamplingBilinear2D(layers.Layer):
name_str) name_str)
class Bilinear(layers.Layer): class Bilinear(Layer):
r""" r"""
This layer performs bilinear on two inputs. This layer performs bilinear on two inputs.
...@@ -651,7 +651,7 @@ class Bilinear(layers.Layer): ...@@ -651,7 +651,7 @@ class Bilinear(layers.Layer):
self._dtype, name_str) self._dtype, name_str)
class Dropout(layers.Layer): class Dropout(Layer):
""" """
Dropout is a regularization technique for reducing overfitting by preventing Dropout is a regularization technique for reducing overfitting by preventing
neuron co-adaption during training as described in the paper: neuron co-adaption during training as described in the paper:
...@@ -725,7 +725,7 @@ class Dropout(layers.Layer): ...@@ -725,7 +725,7 @@ class Dropout(layers.Layer):
name_str) name_str)
class Dropout2D(layers.Layer): class Dropout2D(Layer):
""" """
Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` ,
a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently
...@@ -786,7 +786,7 @@ class Dropout2D(layers.Layer): ...@@ -786,7 +786,7 @@ class Dropout2D(layers.Layer):
name_str) name_str)
class Dropout3D(layers.Layer): class Dropout3D(Layer):
""" """
Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` ,
a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently
...@@ -847,7 +847,7 @@ class Dropout3D(layers.Layer): ...@@ -847,7 +847,7 @@ class Dropout3D(layers.Layer):
name_str) name_str)
class AlphaDropout(layers.Layer): class AlphaDropout(Layer):
""" """
Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with
zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and
...@@ -900,7 +900,7 @@ class AlphaDropout(layers.Layer): ...@@ -900,7 +900,7 @@ class AlphaDropout(layers.Layer):
return 'p={}{}'.format(self.p, name_str) return 'p={}{}'.format(self.p, name_str)
class Pad1D(layers.Layer): class Pad1D(Layer):
""" """
This interface is used to construct a callable object of the ``Pad1D`` class. This interface is used to construct a callable object of the ``Pad1D`` class.
Pad tensor according to 'pad', 'mode' and 'value'. Pad tensor according to 'pad', 'mode' and 'value'.
...@@ -981,7 +981,7 @@ class Pad1D(layers.Layer): ...@@ -981,7 +981,7 @@ class Pad1D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str) self._pad, self._mode, self._value, self._data_format, name_str)
class Pad2D(layers.Layer): class Pad2D(Layer):
""" """
This interface is used to construct a callable object of the ``Pad2D`` class. This interface is used to construct a callable object of the ``Pad2D`` class.
Pad tensor according to 'pad', 'mode' and 'value'. Pad tensor according to 'pad', 'mode' and 'value'.
...@@ -1065,7 +1065,7 @@ class Pad2D(layers.Layer): ...@@ -1065,7 +1065,7 @@ class Pad2D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str) self._pad, self._mode, self._value, self._data_format, name_str)
class Pad3D(layers.Layer): class Pad3D(Layer):
""" """
This interface is used to construct a callable object of the ``Pad3D`` class. This interface is used to construct a callable object of the ``Pad3D`` class.
Pad tensor according to 'pad', 'mode' and 'value'. Pad tensor according to 'pad', 'mode' and 'value'.
...@@ -1149,7 +1149,7 @@ class Pad3D(layers.Layer): ...@@ -1149,7 +1149,7 @@ class Pad3D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str) self._pad, self._mode, self._value, self._data_format, name_str)
class CosineSimilarity(layers.Layer): class CosineSimilarity(Layer):
""" """
This interface is used to compute cosine similarity between x1 and x2 along axis. This interface is used to compute cosine similarity between x1 and x2 along axis.
...@@ -1206,7 +1206,7 @@ class CosineSimilarity(layers.Layer): ...@@ -1206,7 +1206,7 @@ class CosineSimilarity(layers.Layer):
return 'axis={_axis}, eps={_eps}'.format(**self.__dict__) return 'axis={_axis}, eps={_eps}'.format(**self.__dict__)
class Embedding(layers.Layer): class Embedding(Layer):
r""" r"""
**Embedding Layer** **Embedding Layer**
...@@ -1367,7 +1367,7 @@ class Embedding(layers.Layer): ...@@ -1367,7 +1367,7 @@ class Embedding(layers.Layer):
return main_str.format(**self.__dict__) return main_str.format(**self.__dict__)
class Unfold(layers.Layer): class Unfold(Layer):
""" """
This op returns a col buffer of sliding local blocks of input x, also known This op returns a col buffer of sliding local blocks of input x, also known
as im2col for batched 2D image tensors. For each block under the convolution filter, as im2col for batched 2D image tensors. For each block under the convolution filter,
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
from collections import OrderedDict from collections import OrderedDict
from ...fluid.dygraph.layers import Layer from .. import Layer
from collections.abc import Iterable, Mapping from collections.abc import Iterable, Mapping
__all__ = [] __all__ = []
......
...@@ -19,8 +19,8 @@ import numpy as np ...@@ -19,8 +19,8 @@ import numpy as np
from ...fluid import get_flags from ...fluid import get_flags
from ...fluid import core from ...fluid import core
from ...device import get_cudnn_version from ...device import get_cudnn_version
from ...fluid.dygraph import layers from .. import Layer
from ...fluid.initializer import Normal from ..initializer import Normal
from .. import functional as F from .. import functional as F
from ...fluid.layers import utils from ...fluid.layers import utils
from ..functional.conv import _update_padding_nd from ..functional.conv import _update_padding_nd
...@@ -31,7 +31,7 @@ __all__ = [] ...@@ -31,7 +31,7 @@ __all__ = []
def _get_default_param_initializer(num_channels, filter_size): def _get_default_param_initializer(num_channels, filter_size):
filter_elem_num = num_channels * np.prod(filter_size) filter_elem_num = num_channels * np.prod(filter_size)
std = (2.0 / filter_elem_num)**0.5 std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0) return Normal(0.0, std)
def _reverse_repeat_list(t, n): def _reverse_repeat_list(t, n):
...@@ -42,7 +42,7 @@ def _reverse_repeat_list(t, n): ...@@ -42,7 +42,7 @@ def _reverse_repeat_list(t, n):
return list(x for x in reversed(t) for _ in range(n)) return list(x for x in reversed(t) for _ in range(n))
class _ConvNd(layers.Layer): class _ConvNd(Layer):
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channels, out_channels,
...@@ -127,7 +127,7 @@ class _ConvNd(layers.Layer): ...@@ -127,7 +127,7 @@ class _ConvNd(layers.Layer):
return None return None
filter_elem_num = np.prod(self._kernel_size) * self._in_channels filter_elem_num = np.prod(self._kernel_size) * self._in_channels
std = (2.0 / filter_elem_num)**0.5 std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0) return Normal(0.0, std)
self.weight = self.create_parameter( self.weight = self.create_parameter(
shape=filter_shape, shape=filter_shape,
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import numpy as np import numpy as np
import paddle import paddle
from ...fluid.dygraph import layers from .. import Layer
from ...fluid.framework import core, in_dygraph_mode from ...fluid.framework import core, in_dygraph_mode
from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid.data_feeder import check_variable_and_dtype, check_type
from ...fluid.layer_helper import LayerHelper from ...fluid.layer_helper import LayerHelper
...@@ -24,7 +24,7 @@ from paddle import _C_ops ...@@ -24,7 +24,7 @@ from paddle import _C_ops
__all__ = [] __all__ = []
class PairwiseDistance(layers.Layer): class PairwiseDistance(Layer):
r""" r"""
This operator computes the pairwise distance between two vectors. The This operator computes the pairwise distance between two vectors. The
distance is calculated by p-oreder norm: distance is calculated by p-oreder norm:
...@@ -87,7 +87,7 @@ class PairwiseDistance(layers.Layer): ...@@ -87,7 +87,7 @@ class PairwiseDistance(layers.Layer):
'PairwiseDistance') 'PairwiseDistance')
check_variable_and_dtype(y, 'y', ['float32', 'float64'], check_variable_and_dtype(y, 'y', ['float32', 'float64'],
'PairwiseDistance') 'PairwiseDistance')
sub = paddle.fluid.layers.elementwise_sub(x, y) sub = paddle.subtract(x, y)
helper = LayerHelper("PairwiseDistance", name=self.name) helper = LayerHelper("PairwiseDistance", name=self.name)
attrs = { attrs = {
......
...@@ -20,11 +20,12 @@ import paddle.fluid.core as core ...@@ -20,11 +20,12 @@ import paddle.fluid.core as core
import paddle import paddle
from .. import functional as F from .. import functional as F
from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator
from .. import Layer
__all__ = [] __all__ = []
class BCEWithLogitsLoss(fluid.dygraph.Layer): class BCEWithLogitsLoss(Layer):
r""" r"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
...@@ -128,7 +129,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): ...@@ -128,7 +129,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):
return out return out
class CrossEntropyLoss(fluid.dygraph.Layer): class CrossEntropyLoss(Layer):
r""" r"""
By default, this operator implements the cross entropy loss function with softmax. This function By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function combines the calculation of the softmax operation and the cross entropy loss function
...@@ -407,7 +408,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): ...@@ -407,7 +408,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
return ret return ret
class HSigmoidLoss(fluid.dygraph.Layer): class HSigmoidLoss(Layer):
""" """
Hierarchical Sigmoid Layer. Hierarchical Sigmoid Layer.
...@@ -529,7 +530,7 @@ class HSigmoidLoss(fluid.dygraph.Layer): ...@@ -529,7 +530,7 @@ class HSigmoidLoss(fluid.dygraph.Layer):
return out return out
class MSELoss(fluid.dygraph.layers.Layer): class MSELoss(Layer):
r""" r"""
**Mean Square Error Loss** **Mean Square Error Loss**
Computes the mean square error (squared L2 norm) of given input and label. Computes the mean square error (squared L2 norm) of given input and label.
...@@ -596,8 +597,7 @@ class MSELoss(fluid.dygraph.layers.Layer): ...@@ -596,8 +597,7 @@ class MSELoss(fluid.dygraph.layers.Layer):
fluid.data_feeder.check_variable_and_dtype( fluid.data_feeder.check_variable_and_dtype(
label, 'label', ['float32', 'float64'], 'MSELoss') label, 'label', ['float32', 'float64'], 'MSELoss')
square_out = fluid.layers.square( square_out = paddle.square(paddle.subtract(input, label))
fluid.layers.elementwise_sub(input, label))
if self.reduction == 'none': if self.reduction == 'none':
return square_out return square_out
...@@ -608,7 +608,7 @@ class MSELoss(fluid.dygraph.layers.Layer): ...@@ -608,7 +608,7 @@ class MSELoss(fluid.dygraph.layers.Layer):
return getattr(fluid.layers, reduce_op)(square_out) return getattr(fluid.layers, reduce_op)(square_out)
class L1Loss(fluid.dygraph.Layer): class L1Loss(Layer):
r""" r"""
This interface is used to construct a callable object of the ``L1Loss`` class. This interface is used to construct a callable object of the ``L1Loss`` class.
The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows.
...@@ -687,7 +687,7 @@ class L1Loss(fluid.dygraph.Layer): ...@@ -687,7 +687,7 @@ class L1Loss(fluid.dygraph.Layer):
input, label, self.reduction, name=self.name) input, label, self.reduction, name=self.name)
class BCELoss(fluid.dygraph.Layer): class BCELoss(Layer):
""" """
This interface is used to construct a callable object of the ``BCELoss`` class. This interface is used to construct a callable object of the ``BCELoss`` class.
The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input`` The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input``
...@@ -777,7 +777,7 @@ class BCELoss(fluid.dygraph.Layer): ...@@ -777,7 +777,7 @@ class BCELoss(fluid.dygraph.Layer):
return out return out
class NLLLoss(fluid.dygraph.Layer): class NLLLoss(Layer):
r""" r"""
:alias_main: paddle.nn.NLLLoss :alias_main: paddle.nn.NLLLoss
:alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss :alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss
...@@ -886,7 +886,7 @@ class NLLLoss(fluid.dygraph.Layer): ...@@ -886,7 +886,7 @@ class NLLLoss(fluid.dygraph.Layer):
name=self._name) name=self._name)
class KLDivLoss(fluid.dygraph.Layer): class KLDivLoss(Layer):
r""" r"""
This interface calculates the Kullback-Leibler divergence loss This interface calculates the Kullback-Leibler divergence loss
between Input(X) and Input(Target). Notes that Input(X) is the between Input(X) and Input(Target). Notes that Input(X) is the
...@@ -959,7 +959,7 @@ class KLDivLoss(fluid.dygraph.Layer): ...@@ -959,7 +959,7 @@ class KLDivLoss(fluid.dygraph.Layer):
return out return out
class MarginRankingLoss(fluid.dygraph.Layer): class MarginRankingLoss(Layer):
r""" r"""
This interface is used to construct a callable object of the ``MarginRankingLoss`` class. This interface is used to construct a callable object of the ``MarginRankingLoss`` class.
...@@ -1031,7 +1031,7 @@ class MarginRankingLoss(fluid.dygraph.Layer): ...@@ -1031,7 +1031,7 @@ class MarginRankingLoss(fluid.dygraph.Layer):
return out return out
class CTCLoss(fluid.dygraph.Layer): class CTCLoss(Layer):
""" """
An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc)
...@@ -1127,7 +1127,7 @@ class CTCLoss(fluid.dygraph.Layer): ...@@ -1127,7 +1127,7 @@ class CTCLoss(fluid.dygraph.Layer):
norm_by_times=norm_by_times) norm_by_times=norm_by_times)
class SmoothL1Loss(fluid.dygraph.Layer): class SmoothL1Loss(Layer):
r""" r"""
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
term if the absolute element-wise error falls below 1 and an L1 term otherwise. term if the absolute element-wise error falls below 1 and an L1 term otherwise.
......
...@@ -30,15 +30,13 @@ ...@@ -30,15 +30,13 @@
import six import six
from ...fluid.dygraph import BatchNorm # noqa: F401 from ...fluid.dygraph import BatchNorm # noqa: F401
from ...fluid.dygraph import SpectralNorm # noqa: F401 from ...fluid.dygraph import SpectralNorm # noqa: F401
from ...fluid.dygraph import layers
from ...framework import get_default_dtype, set_default_dtype from ...framework import get_default_dtype, set_default_dtype
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
from ...fluid.initializer import Constant from ..initializer import Constant
from ...fluid.param_attr import ParamAttr from ...framework import ParamAttr
from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid.data_feeder import check_variable_and_dtype, check_type
from ...fluid import core, dygraph_utils from ...fluid import core, dygraph_utils
...@@ -47,14 +45,15 @@ from ..functional import batch_norm, layer_norm, instance_norm ...@@ -47,14 +45,15 @@ from ..functional import batch_norm, layer_norm, instance_norm
import numpy as np import numpy as np
import numbers import numbers
import warnings import warnings
from ...fluid.dygraph.base import no_grad from ...framework import no_grad
from .. import functional as F from .. import functional as F
from paddle import _C_ops from paddle import _C_ops
from .. import Layer
__all__ = [] __all__ = []
class _InstanceNormBase(layers.Layer): class _InstanceNormBase(Layer):
""" """
This class is based class for InstanceNorm1D, 2d, 3d. This class is based class for InstanceNorm1D, 2d, 3d.
...@@ -317,7 +316,7 @@ class InstanceNorm3D(_InstanceNormBase): ...@@ -317,7 +316,7 @@ class InstanceNorm3D(_InstanceNormBase):
len(input.shape))) len(input.shape)))
class GroupNorm(layers.Layer): class GroupNorm(Layer):
""" """
This interface is used to construct a callable object of the ``GroupNorm`` class. This interface is used to construct a callable object of the ``GroupNorm`` class.
For more details, refer to code examples. For more details, refer to code examples.
...@@ -436,7 +435,7 @@ class GroupNorm(layers.Layer): ...@@ -436,7 +435,7 @@ class GroupNorm(layers.Layer):
self._num_groups, self._num_channels, self._epsilon) self._num_groups, self._num_channels, self._epsilon)
class LayerNorm(layers.Layer): class LayerNorm(Layer):
r""" r"""
:alias_main: paddle.nn.LayerNorm :alias_main: paddle.nn.LayerNorm
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
...@@ -544,7 +543,7 @@ class LayerNorm(layers.Layer): ...@@ -544,7 +543,7 @@ class LayerNorm(layers.Layer):
self._epsilon) self._epsilon)
class _BatchNormBase(layers.Layer): class _BatchNormBase(Layer):
""" """
BatchNorm base . BatchNorm base .
""" """
...@@ -1181,7 +1180,7 @@ class SyncBatchNorm(_BatchNormBase): ...@@ -1181,7 +1180,7 @@ class SyncBatchNorm(_BatchNormBase):
return layer_output return layer_output
class LocalResponseNorm(layers.Layer): class LocalResponseNorm(Layer):
""" """
Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions.
For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_ For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
......
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper from ...fluid.layer_helper import LayerHelper
from .. import functional as F from .. import functional as F
from .. import Layer
__all__ = [] __all__ = []
class AvgPool1D(layers.Layer): class AvgPool1D(Layer):
r""" r"""
This operation applies a 1D average pooling over an input signal composed This operation applies a 1D average pooling over an input signal composed
of several input planes, based on the input, output_size, return_mask parameters. of several input planes, based on the input, output_size, return_mask parameters.
...@@ -109,7 +109,7 @@ class AvgPool1D(layers.Layer): ...@@ -109,7 +109,7 @@ class AvgPool1D(layers.Layer):
**self.__dict__) **self.__dict__)
class AvgPool2D(layers.Layer): class AvgPool2D(Layer):
r""" r"""
This operation applies 2D average pooling over input features based on the input, This operation applies 2D average pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
...@@ -220,7 +220,7 @@ class AvgPool2D(layers.Layer): ...@@ -220,7 +220,7 @@ class AvgPool2D(layers.Layer):
**self.__dict__) **self.__dict__)
class AvgPool3D(layers.Layer): class AvgPool3D(Layer):
""" """
This operation applies 3D max pooling over input features based on the input, This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
...@@ -318,7 +318,7 @@ class AvgPool3D(layers.Layer): ...@@ -318,7 +318,7 @@ class AvgPool3D(layers.Layer):
**self.__dict__) **self.__dict__)
class MaxPool1D(layers.Layer): class MaxPool1D(Layer):
""" """
This operation applies 1D max pooling over input signal This operation applies 1D max pooling over input signal
composed of several input planes based on the input, composed of several input planes based on the input,
...@@ -412,7 +412,7 @@ class MaxPool1D(layers.Layer): ...@@ -412,7 +412,7 @@ class MaxPool1D(layers.Layer):
**self.__dict__) **self.__dict__)
class MaxPool2D(layers.Layer): class MaxPool2D(Layer):
r""" r"""
This operation applies 2D max pooling over input feature based on the input, This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
...@@ -522,7 +522,7 @@ class MaxPool2D(layers.Layer): ...@@ -522,7 +522,7 @@ class MaxPool2D(layers.Layer):
**self.__dict__) **self.__dict__)
class MaxPool3D(layers.Layer): class MaxPool3D(Layer):
""" """
This operation applies 3D max pooling over input features based on the input, This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
...@@ -620,7 +620,7 @@ class MaxPool3D(layers.Layer): ...@@ -620,7 +620,7 @@ class MaxPool3D(layers.Layer):
**self.__dict__) **self.__dict__)
class AdaptiveAvgPool1D(layers.Layer): class AdaptiveAvgPool1D(Layer):
r""" r"""
This operation applies a 1D adaptive average pooling over an input signal composed This operation applies a 1D adaptive average pooling over an input signal composed
...@@ -693,7 +693,7 @@ class AdaptiveAvgPool1D(layers.Layer): ...@@ -693,7 +693,7 @@ class AdaptiveAvgPool1D(layers.Layer):
return 'output_size={}'.format(self.output_size) return 'output_size={}'.format(self.output_size)
class AdaptiveAvgPool2D(layers.Layer): class AdaptiveAvgPool2D(Layer):
r""" r"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
...@@ -779,7 +779,7 @@ class AdaptiveAvgPool2D(layers.Layer): ...@@ -779,7 +779,7 @@ class AdaptiveAvgPool2D(layers.Layer):
return 'output_size={}'.format(self._output_size) return 'output_size={}'.format(self._output_size)
class AdaptiveAvgPool3D(layers.Layer): class AdaptiveAvgPool3D(Layer):
r""" r"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
...@@ -872,7 +872,7 @@ class AdaptiveAvgPool3D(layers.Layer): ...@@ -872,7 +872,7 @@ class AdaptiveAvgPool3D(layers.Layer):
return 'output_size={}'.format(self._output_size) return 'output_size={}'.format(self._output_size)
class AdaptiveMaxPool1D(layers.Layer): class AdaptiveMaxPool1D(Layer):
""" """
This operation applies a 1D adaptive max pooling over an input signal composed This operation applies a 1D adaptive max pooling over an input signal composed
...@@ -956,7 +956,7 @@ class AdaptiveMaxPool1D(layers.Layer): ...@@ -956,7 +956,7 @@ class AdaptiveMaxPool1D(layers.Layer):
self.return_mask) self.return_mask)
class AdaptiveMaxPool2D(layers.Layer): class AdaptiveMaxPool2D(Layer):
""" """
This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and
...@@ -1037,7 +1037,7 @@ class AdaptiveMaxPool2D(layers.Layer): ...@@ -1037,7 +1037,7 @@ class AdaptiveMaxPool2D(layers.Layer):
self._return_mask) self._return_mask)
class AdaptiveMaxPool3D(layers.Layer): class AdaptiveMaxPool3D(Layer):
""" """
This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are
determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus
......
...@@ -28,7 +28,7 @@ from paddle import framework ...@@ -28,7 +28,7 @@ from paddle import framework
from paddle.device import get_device, get_cudnn_version from paddle.device import get_device, get_cudnn_version
from paddle.nn import functional as F from paddle.nn import functional as F
from paddle.nn import initializer as I from paddle.nn import initializer as I
from paddle.fluid.dygraph import Layer, LayerList from paddle.nn import Layer, LayerList
from paddle.fluid.layers import utils from paddle.fluid.layers import utils
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
from paddle.fluid.data_feeder import convert_dtype from paddle.fluid.data_feeder import convert_dtype
...@@ -962,7 +962,7 @@ class RNNBase(LayerList): ...@@ -962,7 +962,7 @@ class RNNBase(LayerList):
# for static-graph, append coalesce_tensor into startup program # for static-graph, append coalesce_tensor into startup program
with fluid.program_guard(fluid.default_startup_program(), with fluid.program_guard(fluid.default_startup_program(),
fluid.default_startup_program()): fluid.default_startup_program()):
with framework.no_grad(): with paddle.no_grad():
self._helper.append_op( self._helper.append_op(
type="coalesce_tensor", type="coalesce_tensor",
inputs={"Input": self._all_weights}, inputs={"Input": self._all_weights},
...@@ -1040,10 +1040,10 @@ class RNNBase(LayerList): ...@@ -1040,10 +1040,10 @@ class RNNBase(LayerList):
]) ])
else: else:
initial_states = [initial_states] if isinstance( initial_states = [initial_states] if isinstance(
initial_states, initial_states, paddle.static.Variable) else initial_states
paddle.fluid.framework.Variable) else initial_states
if self.could_use_cudnn and (not fluid.core.is_compiled_with_rocm() or if self.could_use_cudnn and (
not paddle.device.is_compiled_with_rocm() or
sequence_length is None): sequence_length is None):
# Add CPU kernel and dispatch in backend later # Add CPU kernel and dispatch in backend later
return self._cudnn_impl(inputs, initial_states, sequence_length) return self._cudnn_impl(inputs, initial_states, sequence_length)
......
...@@ -24,8 +24,8 @@ from .norm import LayerNorm ...@@ -24,8 +24,8 @@ from .norm import LayerNorm
from .. import functional as F from .. import functional as F
from ... import tensor from ... import tensor
from ...fluid import layers from ...fluid import layers
from ...fluid.dygraph import Layer, LayerList from .. import Layer, LayerList
from ...fluid.param_attr import ParamAttr from ...framework import ParamAttr
from ...fluid.data_feeder import convert_dtype from ...fluid.data_feeder import convert_dtype
__all__ = [] __all__ = []
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
# TODO: define specitial functions used in computer vision task # TODO: define specitial functions used in computer vision task
from ...fluid.dygraph import layers from .. import Layer
from .. import functional from .. import functional
__all__ = [] __all__ = []
class PixelShuffle(layers.Layer): class PixelShuffle(Layer):
""" """
PixelShuffle Layer PixelShuffle Layer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册