diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 5fe17e8c193e3ea99eddbd8bfb2668e3a1228286..8f094877e74b6730ace0bd0222042a7aa2f60b48 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -15,6 +15,11 @@ # TODO: import all neural network related api under this directory, # including layers, linear, conv, rnn etc. +from ..fluid.dygraph.layers import Layer # noqa: F401 +from ..fluid.dygraph.container import LayerList # noqa: F401 +from ..fluid.dygraph.container import ParameterList # noqa: F401 +from ..fluid.dygraph.container import Sequential # noqa: F401 + from .clip import ClipGradByGlobalNorm # noqa: F401 from .clip import ClipGradByNorm # noqa: F401 from .clip import ClipGradByValue # noqa: F401 @@ -130,10 +135,6 @@ from .utils.spectral_norm_hook import spectral_norm # TODO: remove loss, keep it for too many used in unitests from .layer import loss # noqa: F401 -from ..fluid.dygraph.layers import Layer # noqa: F401 -from ..fluid.dygraph.container import LayerList # noqa: F401 -from ..fluid.dygraph.container import ParameterList # noqa: F401 -from ..fluid.dygraph.container import Sequential # noqa: F401 from . import utils # noqa: F401 from . import functional # noqa: F401 diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index d5b37144cfffed55396787cc7745ea7b80639672..695e387bda84f073718b717ca201161489517b50 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -14,18 +14,18 @@ # TODO: define activation functions of neural network -from ...fluid.dygraph import layers from ...fluid import core from ...fluid.framework import in_dygraph_mode -from ...fluid.param_attr import ParamAttr -from ...fluid.initializer import Constant +from ...framework import ParamAttr +from ..initializer import Constant from paddle.framework import get_default_dtype from .. import functional as F +from paddle.nn import Layer __all__ = [] -class ELU(layers.Layer): +class ELU(Layer): r""" ELU Activation. @@ -67,7 +67,7 @@ class ELU(layers.Layer): return 'alpha={}{}'.format(self._alpha, name_str) -class GELU(layers.Layer): +class GELU(Layer): r""" GELU Activation. @@ -120,7 +120,7 @@ class GELU(layers.Layer): return 'approximate={}{}'.format(self._approximate, name_str) -class Hardshrink(layers.Layer): +class Hardshrink(Layer): r""" Hardshrink Activation @@ -168,7 +168,7 @@ class Hardshrink(layers.Layer): return 'threshold={}{}'.format(self._threshold, name_str) -class Hardswish(layers.Layer): +class Hardswish(Layer): r""" Hardswish activation @@ -218,7 +218,7 @@ class Hardswish(layers.Layer): return name_str -class Tanh(layers.Layer): +class Tanh(Layer): r""" Tanh Activation. @@ -259,7 +259,7 @@ class Tanh(layers.Layer): return name_str -class Hardtanh(layers.Layer): +class Hardtanh(Layer): r""" Hardtanh Activation @@ -305,7 +305,7 @@ class Hardtanh(layers.Layer): return 'min={}, max={}{}'.format(self._min, self._max, name_str) -class PReLU(layers.Layer): +class PReLU(Layer): """ PReLU Activation. @@ -377,7 +377,7 @@ class PReLU(layers.Layer): self._num_parameters, self._init, self._dtype, name_str) -class ReLU(layers.Layer): +class ReLU(Layer): """ ReLU Activation. @@ -415,7 +415,7 @@ class ReLU(layers.Layer): return name_str -class ReLU6(layers.Layer): +class ReLU6(Layer): """ ReLU6 Activation @@ -454,7 +454,7 @@ class ReLU6(layers.Layer): return name_str -class SELU(layers.Layer): +class SELU(Layer): r""" SELU Activation @@ -505,7 +505,7 @@ class SELU(layers.Layer): name_str) -class LeakyReLU(layers.Layer): +class LeakyReLU(Layer): r""" Leaky ReLU Activation. @@ -553,7 +553,7 @@ class LeakyReLU(layers.Layer): return 'negative_slope={}{}'.format(self._negative_slope, name_str) -class Sigmoid(layers.Layer): +class Sigmoid(Layer): """ this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x. @@ -593,7 +593,7 @@ class Sigmoid(layers.Layer): return name_str -class Hardsigmoid(layers.Layer): +class Hardsigmoid(Layer): r""" This interface is used to construct a callable object of the ``Hardsigmoid`` class. This layer calcluate the `hardsigmoid` of input x. @@ -644,7 +644,7 @@ class Hardsigmoid(layers.Layer): return name_str -class Softplus(layers.Layer): +class Softplus(Layer): r""" Softplus Activation @@ -689,7 +689,7 @@ class Softplus(layers.Layer): name_str) -class Softshrink(layers.Layer): +class Softshrink(Layer): r""" Softshrink Activation @@ -734,7 +734,7 @@ class Softshrink(layers.Layer): return 'threshold={}{}'.format(self._threshold, name_str) -class Softsign(layers.Layer): +class Softsign(Layer): r""" Softsign Activation @@ -773,7 +773,7 @@ class Softsign(layers.Layer): return name_str -class Swish(layers.Layer): +class Swish(Layer): r""" Swish Activation. @@ -812,7 +812,7 @@ class Swish(layers.Layer): return name_str -class Tanhshrink(layers.Layer): +class Tanhshrink(Layer): """ Tanhshrink Activation @@ -851,7 +851,7 @@ class Tanhshrink(layers.Layer): return name_str -class ThresholdedReLU(layers.Layer): +class ThresholdedReLU(Layer): r""" Thresholded ReLU Activation @@ -895,7 +895,7 @@ class ThresholdedReLU(layers.Layer): return 'threshold={}{}'.format(self._threshold, name_str) -class Silu(layers.Layer): +class Silu(Layer): """ Silu Activation. .. math:: @@ -933,7 +933,7 @@ class Silu(layers.Layer): return name_str -class LogSigmoid(layers.Layer): +class LogSigmoid(Layer): r""" LogSigmoid Activation. @@ -972,7 +972,7 @@ class LogSigmoid(layers.Layer): return name_str -class Softmax(layers.Layer): +class Softmax(Layer): r""" Softmax Activation. @@ -1099,7 +1099,7 @@ class Softmax(layers.Layer): return 'axis={}{}'.format(self._axis, name_str) -class LogSoftmax(layers.Layer): +class LogSoftmax(Layer): r""" This operator implements the log_softmax layer. The calculation process is as follows: @@ -1157,7 +1157,7 @@ class LogSoftmax(layers.Layer): return 'axis={}{}'.format(self._axis, name_str) -class Maxout(layers.Layer): +class Maxout(Layer): r""" Maxout Activation. diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 1d7f7c6589986bdf478347d428c5ec689a7be882..9aa8097befc98bfc6bb93f083411a0d4e534bbb5 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -15,10 +15,10 @@ # TODO: define the common classes to build a neural network import paddle from ...fluid.dygraph import Flatten # noqa: F401 -from ...fluid.dygraph import layers from ...fluid.framework import in_dygraph_mode from .. import functional as F from ...fluid.framework import _dygraph_tracer +from paddle.nn import Layer __all__ = [] @@ -30,7 +30,7 @@ def _npairs(x, n): return x -class Linear(layers.Layer): +class Linear(Layer): r""" Fully-connected linear transformation layer. For each input :math:`X` , @@ -135,7 +135,7 @@ class Linear(layers.Layer): self.weight.shape[0], self.weight.shape[1], self._dtype, name_str) -class Upsample(layers.Layer): +class Upsample(Layer): """ This op resizes a batch of images. @@ -385,7 +385,7 @@ class Upsample(layers.Layer): self.data_format, name_str) -class UpsamplingNearest2D(layers.Layer): +class UpsamplingNearest2D(Layer): """ This op upsamples a batch of images, using nearest neighbours' pixel values. The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), @@ -470,7 +470,7 @@ class UpsamplingNearest2D(layers.Layer): name_str) -class UpsamplingBilinear2D(layers.Layer): +class UpsamplingBilinear2D(Layer): """ This op upsamples a batch of images, using bilinear' pixel values. The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), @@ -556,7 +556,7 @@ class UpsamplingBilinear2D(layers.Layer): name_str) -class Bilinear(layers.Layer): +class Bilinear(Layer): r""" This layer performs bilinear on two inputs. @@ -651,7 +651,7 @@ class Bilinear(layers.Layer): self._dtype, name_str) -class Dropout(layers.Layer): +class Dropout(Layer): """ Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during training as described in the paper: @@ -725,7 +725,7 @@ class Dropout(layers.Layer): name_str) -class Dropout2D(layers.Layer): +class Dropout2D(Layer): """ Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently @@ -786,7 +786,7 @@ class Dropout2D(layers.Layer): name_str) -class Dropout3D(layers.Layer): +class Dropout3D(Layer): """ Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently @@ -847,7 +847,7 @@ class Dropout3D(layers.Layer): name_str) -class AlphaDropout(layers.Layer): +class AlphaDropout(Layer): """ Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and @@ -900,7 +900,7 @@ class AlphaDropout(layers.Layer): return 'p={}{}'.format(self.p, name_str) -class Pad1D(layers.Layer): +class Pad1D(Layer): """ This interface is used to construct a callable object of the ``Pad1D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -981,7 +981,7 @@ class Pad1D(layers.Layer): self._pad, self._mode, self._value, self._data_format, name_str) -class Pad2D(layers.Layer): +class Pad2D(Layer): """ This interface is used to construct a callable object of the ``Pad2D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -1065,7 +1065,7 @@ class Pad2D(layers.Layer): self._pad, self._mode, self._value, self._data_format, name_str) -class Pad3D(layers.Layer): +class Pad3D(Layer): """ This interface is used to construct a callable object of the ``Pad3D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -1149,7 +1149,7 @@ class Pad3D(layers.Layer): self._pad, self._mode, self._value, self._data_format, name_str) -class CosineSimilarity(layers.Layer): +class CosineSimilarity(Layer): """ This interface is used to compute cosine similarity between x1 and x2 along axis. @@ -1206,7 +1206,7 @@ class CosineSimilarity(layers.Layer): return 'axis={_axis}, eps={_eps}'.format(**self.__dict__) -class Embedding(layers.Layer): +class Embedding(Layer): r""" **Embedding Layer** @@ -1367,7 +1367,7 @@ class Embedding(layers.Layer): return main_str.format(**self.__dict__) -class Unfold(layers.Layer): +class Unfold(Layer): """ This op returns a col buffer of sliding local blocks of input x, also known as im2col for batched 2D image tensors. For each block under the convolution filter, diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index 48697aa8f509090d44a173a2bc47b1a18184a622..aadaf1efce50faf0c81238ea1f3ea0eda1f87513 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import OrderedDict -from ...fluid.dygraph.layers import Layer +from .. import Layer from collections.abc import Iterable, Mapping __all__ = [] diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 76011aeff5b4fb129dac365be63068e494c258fd..26fd544ecce11234301b948f91128a4e6c052210 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -19,8 +19,8 @@ import numpy as np from ...fluid import get_flags from ...fluid import core from ...device import get_cudnn_version -from ...fluid.dygraph import layers -from ...fluid.initializer import Normal +from .. import Layer +from ..initializer import Normal from .. import functional as F from ...fluid.layers import utils from ..functional.conv import _update_padding_nd @@ -31,7 +31,7 @@ __all__ = [] def _get_default_param_initializer(num_channels, filter_size): filter_elem_num = num_channels * np.prod(filter_size) std = (2.0 / filter_elem_num)**0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) def _reverse_repeat_list(t, n): @@ -42,7 +42,7 @@ def _reverse_repeat_list(t, n): return list(x for x in reversed(t) for _ in range(n)) -class _ConvNd(layers.Layer): +class _ConvNd(Layer): def __init__(self, in_channels, out_channels, @@ -127,7 +127,7 @@ class _ConvNd(layers.Layer): return None filter_elem_num = np.prod(self._kernel_size) * self._in_channels std = (2.0 / filter_elem_num)**0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) self.weight = self.create_parameter( shape=filter_shape, diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index 27e904980d143d8e80282cdd6e6d5adc40ef5dcb..0547bf75a4bf6c4b2b4a878fdf37f00c007ef4bc 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -15,7 +15,7 @@ import numpy as np import paddle -from ...fluid.dygraph import layers +from .. import Layer from ...fluid.framework import core, in_dygraph_mode from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid.layer_helper import LayerHelper @@ -24,7 +24,7 @@ from paddle import _C_ops __all__ = [] -class PairwiseDistance(layers.Layer): +class PairwiseDistance(Layer): r""" This operator computes the pairwise distance between two vectors. The distance is calculated by p-oreder norm: @@ -87,7 +87,7 @@ class PairwiseDistance(layers.Layer): 'PairwiseDistance') check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'PairwiseDistance') - sub = paddle.fluid.layers.elementwise_sub(x, y) + sub = paddle.subtract(x, y) helper = LayerHelper("PairwiseDistance", name=self.name) attrs = { diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 8f43eb8866b4bb7e6d1738999b7f64335fa62185..31b552bed162c2b1152acfd8252aaea7cb106eb8 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -20,11 +20,12 @@ import paddle.fluid.core as core import paddle from .. import functional as F from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator +from .. import Layer __all__ = [] -class BCEWithLogitsLoss(fluid.dygraph.Layer): +class BCEWithLogitsLoss(Layer): r""" This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` @@ -128,7 +129,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): return out -class CrossEntropyLoss(fluid.dygraph.Layer): +class CrossEntropyLoss(Layer): r""" By default, this operator implements the cross entropy loss function with softmax. This function combines the calculation of the softmax operation and the cross entropy loss function @@ -407,7 +408,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): return ret -class HSigmoidLoss(fluid.dygraph.Layer): +class HSigmoidLoss(Layer): """ Hierarchical Sigmoid Layer. @@ -529,7 +530,7 @@ class HSigmoidLoss(fluid.dygraph.Layer): return out -class MSELoss(fluid.dygraph.layers.Layer): +class MSELoss(Layer): r""" **Mean Square Error Loss** Computes the mean square error (squared L2 norm) of given input and label. @@ -596,8 +597,7 @@ class MSELoss(fluid.dygraph.layers.Layer): fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64'], 'MSELoss') - square_out = fluid.layers.square( - fluid.layers.elementwise_sub(input, label)) + square_out = paddle.square(paddle.subtract(input, label)) if self.reduction == 'none': return square_out @@ -608,7 +608,7 @@ class MSELoss(fluid.dygraph.layers.Layer): return getattr(fluid.layers, reduce_op)(square_out) -class L1Loss(fluid.dygraph.Layer): +class L1Loss(Layer): r""" This interface is used to construct a callable object of the ``L1Loss`` class. The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. @@ -687,7 +687,7 @@ class L1Loss(fluid.dygraph.Layer): input, label, self.reduction, name=self.name) -class BCELoss(fluid.dygraph.Layer): +class BCELoss(Layer): """ This interface is used to construct a callable object of the ``BCELoss`` class. The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input`` @@ -777,7 +777,7 @@ class BCELoss(fluid.dygraph.Layer): return out -class NLLLoss(fluid.dygraph.Layer): +class NLLLoss(Layer): r""" :alias_main: paddle.nn.NLLLoss :alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss @@ -886,7 +886,7 @@ class NLLLoss(fluid.dygraph.Layer): name=self._name) -class KLDivLoss(fluid.dygraph.Layer): +class KLDivLoss(Layer): r""" This interface calculates the Kullback-Leibler divergence loss between Input(X) and Input(Target). Notes that Input(X) is the @@ -959,7 +959,7 @@ class KLDivLoss(fluid.dygraph.Layer): return out -class MarginRankingLoss(fluid.dygraph.Layer): +class MarginRankingLoss(Layer): r""" This interface is used to construct a callable object of the ``MarginRankingLoss`` class. @@ -1031,7 +1031,7 @@ class MarginRankingLoss(fluid.dygraph.Layer): return out -class CTCLoss(fluid.dygraph.Layer): +class CTCLoss(Layer): """ An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) @@ -1127,7 +1127,7 @@ class CTCLoss(fluid.dygraph.Layer): norm_by_times=norm_by_times) -class SmoothL1Loss(fluid.dygraph.Layer): +class SmoothL1Loss(Layer): r""" This operator calculates smooth_l1_loss. Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise. diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index b93412a7b22ccd8d354c1263fa5a6c476303e469..9abbc494258948ff81e8b547048e14a173d53979 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -30,15 +30,13 @@ import six from ...fluid.dygraph import BatchNorm # noqa: F401 - from ...fluid.dygraph import SpectralNorm # noqa: F401 -from ...fluid.dygraph import layers from ...framework import get_default_dtype, set_default_dtype from ...fluid.framework import in_dygraph_mode -from ...fluid.initializer import Constant -from ...fluid.param_attr import ParamAttr +from ..initializer import Constant +from ...framework import ParamAttr from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid import core, dygraph_utils @@ -47,14 +45,15 @@ from ..functional import batch_norm, layer_norm, instance_norm import numpy as np import numbers import warnings -from ...fluid.dygraph.base import no_grad +from ...framework import no_grad from .. import functional as F from paddle import _C_ops +from .. import Layer __all__ = [] -class _InstanceNormBase(layers.Layer): +class _InstanceNormBase(Layer): """ This class is based class for InstanceNorm1D, 2d, 3d. @@ -317,7 +316,7 @@ class InstanceNorm3D(_InstanceNormBase): len(input.shape))) -class GroupNorm(layers.Layer): +class GroupNorm(Layer): """ This interface is used to construct a callable object of the ``GroupNorm`` class. For more details, refer to code examples. @@ -436,7 +435,7 @@ class GroupNorm(layers.Layer): self._num_groups, self._num_channels, self._epsilon) -class LayerNorm(layers.Layer): +class LayerNorm(Layer): r""" :alias_main: paddle.nn.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm @@ -544,7 +543,7 @@ class LayerNorm(layers.Layer): self._epsilon) -class _BatchNormBase(layers.Layer): +class _BatchNormBase(Layer): """ BatchNorm base . """ @@ -1181,7 +1180,7 @@ class SyncBatchNorm(_BatchNormBase): return layer_output -class LocalResponseNorm(layers.Layer): +class LocalResponseNorm(Layer): """ Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks `_ diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 528572ee21b7cc0859c0488bc791239418a4c9f8..881f92568414dcbf03dbc3e91569fc0812492716 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.dygraph import layers from ...fluid.layer_helper import LayerHelper from .. import functional as F +from .. import Layer __all__ = [] -class AvgPool1D(layers.Layer): +class AvgPool1D(Layer): r""" This operation applies a 1D average pooling over an input signal composed of several input planes, based on the input, output_size, return_mask parameters. @@ -109,7 +109,7 @@ class AvgPool1D(layers.Layer): **self.__dict__) -class AvgPool2D(layers.Layer): +class AvgPool2D(Layer): r""" This operation applies 2D average pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -220,7 +220,7 @@ class AvgPool2D(layers.Layer): **self.__dict__) -class AvgPool3D(layers.Layer): +class AvgPool3D(Layer): """ This operation applies 3D max pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -318,7 +318,7 @@ class AvgPool3D(layers.Layer): **self.__dict__) -class MaxPool1D(layers.Layer): +class MaxPool1D(Layer): """ This operation applies 1D max pooling over input signal composed of several input planes based on the input, @@ -412,7 +412,7 @@ class MaxPool1D(layers.Layer): **self.__dict__) -class MaxPool2D(layers.Layer): +class MaxPool2D(Layer): r""" This operation applies 2D max pooling over input feature based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -522,7 +522,7 @@ class MaxPool2D(layers.Layer): **self.__dict__) -class MaxPool3D(layers.Layer): +class MaxPool3D(Layer): """ This operation applies 3D max pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -620,7 +620,7 @@ class MaxPool3D(layers.Layer): **self.__dict__) -class AdaptiveAvgPool1D(layers.Layer): +class AdaptiveAvgPool1D(Layer): r""" This operation applies a 1D adaptive average pooling over an input signal composed @@ -693,7 +693,7 @@ class AdaptiveAvgPool1D(layers.Layer): return 'output_size={}'.format(self.output_size) -class AdaptiveAvgPool2D(layers.Layer): +class AdaptiveAvgPool2D(Layer): r""" This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions @@ -779,7 +779,7 @@ class AdaptiveAvgPool2D(layers.Layer): return 'output_size={}'.format(self._output_size) -class AdaptiveAvgPool3D(layers.Layer): +class AdaptiveAvgPool3D(Layer): r""" This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions @@ -872,7 +872,7 @@ class AdaptiveAvgPool3D(layers.Layer): return 'output_size={}'.format(self._output_size) -class AdaptiveMaxPool1D(layers.Layer): +class AdaptiveMaxPool1D(Layer): """ This operation applies a 1D adaptive max pooling over an input signal composed @@ -956,7 +956,7 @@ class AdaptiveMaxPool1D(layers.Layer): self.return_mask) -class AdaptiveMaxPool2D(layers.Layer): +class AdaptiveMaxPool2D(Layer): """ This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and @@ -1037,7 +1037,7 @@ class AdaptiveMaxPool2D(layers.Layer): self._return_mask) -class AdaptiveMaxPool3D(layers.Layer): +class AdaptiveMaxPool3D(Layer): """ This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 693ec0200b0d059eca7871742dabae00e0580bdf..77168566d88c6055bdce3a8f168b102a1ef29343 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -28,7 +28,7 @@ from paddle import framework from paddle.device import get_device, get_cudnn_version from paddle.nn import functional as F from paddle.nn import initializer as I -from paddle.fluid.dygraph import Layer, LayerList +from paddle.nn import Layer, LayerList from paddle.fluid.layers import utils from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.data_feeder import convert_dtype @@ -962,7 +962,7 @@ class RNNBase(LayerList): # for static-graph, append coalesce_tensor into startup program with fluid.program_guard(fluid.default_startup_program(), fluid.default_startup_program()): - with framework.no_grad(): + with paddle.no_grad(): self._helper.append_op( type="coalesce_tensor", inputs={"Input": self._all_weights}, @@ -1040,11 +1040,11 @@ class RNNBase(LayerList): ]) else: initial_states = [initial_states] if isinstance( - initial_states, - paddle.fluid.framework.Variable) else initial_states + initial_states, paddle.static.Variable) else initial_states - if self.could_use_cudnn and (not fluid.core.is_compiled_with_rocm() or - sequence_length is None): + if self.could_use_cudnn and ( + not paddle.device.is_compiled_with_rocm() or + sequence_length is None): # Add CPU kernel and dispatch in backend later return self._cudnn_impl(inputs, initial_states, sequence_length) diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 5aba8ae85ad1b32a35de48cddc8dadd5d3929e70..eacf5aac9daa9f2d37795aaed5695df6b642f786 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -24,8 +24,8 @@ from .norm import LayerNorm from .. import functional as F from ... import tensor from ...fluid import layers -from ...fluid.dygraph import Layer, LayerList -from ...fluid.param_attr import ParamAttr +from .. import Layer, LayerList +from ...framework import ParamAttr from ...fluid.data_feeder import convert_dtype __all__ = [] diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index e6d3af9a37b329231d625a4542eecea54d943e50..7f8b51ca10818ec10a794f2910f066f16cf26278 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -14,13 +14,13 @@ # TODO: define specitial functions used in computer vision task -from ...fluid.dygraph import layers +from .. import Layer from .. import functional __all__ = [] -class PixelShuffle(layers.Layer): +class PixelShuffle(Layer): """ PixelShuffle Layer