未验证 提交 1c95631f 编写于 作者: Z zhiboniu 提交者: GitHub

unset fluid api in nn.layer (#34129)

上级 1cb4c154
......@@ -15,6 +15,11 @@
# TODO: import all neural network related api under this directory,
# including layers, linear, conv, rnn etc.
from ..fluid.dygraph.layers import Layer # noqa: F401
from ..fluid.dygraph.container import LayerList # noqa: F401
from ..fluid.dygraph.container import ParameterList # noqa: F401
from ..fluid.dygraph.container import Sequential # noqa: F401
from .clip import ClipGradByGlobalNorm # noqa: F401
from .clip import ClipGradByNorm # noqa: F401
from .clip import ClipGradByValue # noqa: F401
......@@ -130,10 +135,6 @@ from .utils.spectral_norm_hook import spectral_norm
# TODO: remove loss, keep it for too many used in unitests
from .layer import loss # noqa: F401
from ..fluid.dygraph.layers import Layer # noqa: F401
from ..fluid.dygraph.container import LayerList # noqa: F401
from ..fluid.dygraph.container import ParameterList # noqa: F401
from ..fluid.dygraph.container import Sequential # noqa: F401
from . import utils # noqa: F401
from . import functional # noqa: F401
......
......@@ -14,18 +14,18 @@
# TODO: define activation functions of neural network
from ...fluid.dygraph import layers
from ...fluid import core
from ...fluid.framework import in_dygraph_mode
from ...fluid.param_attr import ParamAttr
from ...fluid.initializer import Constant
from ...framework import ParamAttr
from ..initializer import Constant
from paddle.framework import get_default_dtype
from .. import functional as F
from paddle.nn import Layer
__all__ = []
class ELU(layers.Layer):
class ELU(Layer):
r"""
ELU Activation.
......@@ -67,7 +67,7 @@ class ELU(layers.Layer):
return 'alpha={}{}'.format(self._alpha, name_str)
class GELU(layers.Layer):
class GELU(Layer):
r"""
GELU Activation.
......@@ -120,7 +120,7 @@ class GELU(layers.Layer):
return 'approximate={}{}'.format(self._approximate, name_str)
class Hardshrink(layers.Layer):
class Hardshrink(Layer):
r"""
Hardshrink Activation
......@@ -168,7 +168,7 @@ class Hardshrink(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str)
class Hardswish(layers.Layer):
class Hardswish(Layer):
r"""
Hardswish activation
......@@ -218,7 +218,7 @@ class Hardswish(layers.Layer):
return name_str
class Tanh(layers.Layer):
class Tanh(Layer):
r"""
Tanh Activation.
......@@ -259,7 +259,7 @@ class Tanh(layers.Layer):
return name_str
class Hardtanh(layers.Layer):
class Hardtanh(Layer):
r"""
Hardtanh Activation
......@@ -305,7 +305,7 @@ class Hardtanh(layers.Layer):
return 'min={}, max={}{}'.format(self._min, self._max, name_str)
class PReLU(layers.Layer):
class PReLU(Layer):
"""
PReLU Activation.
......@@ -377,7 +377,7 @@ class PReLU(layers.Layer):
self._num_parameters, self._init, self._dtype, name_str)
class ReLU(layers.Layer):
class ReLU(Layer):
"""
ReLU Activation.
......@@ -415,7 +415,7 @@ class ReLU(layers.Layer):
return name_str
class ReLU6(layers.Layer):
class ReLU6(Layer):
"""
ReLU6 Activation
......@@ -454,7 +454,7 @@ class ReLU6(layers.Layer):
return name_str
class SELU(layers.Layer):
class SELU(Layer):
r"""
SELU Activation
......@@ -505,7 +505,7 @@ class SELU(layers.Layer):
name_str)
class LeakyReLU(layers.Layer):
class LeakyReLU(Layer):
r"""
Leaky ReLU Activation.
......@@ -553,7 +553,7 @@ class LeakyReLU(layers.Layer):
return 'negative_slope={}{}'.format(self._negative_slope, name_str)
class Sigmoid(layers.Layer):
class Sigmoid(Layer):
"""
this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x.
......@@ -593,7 +593,7 @@ class Sigmoid(layers.Layer):
return name_str
class Hardsigmoid(layers.Layer):
class Hardsigmoid(Layer):
r"""
This interface is used to construct a callable object of the ``Hardsigmoid`` class.
This layer calcluate the `hardsigmoid` of input x.
......@@ -644,7 +644,7 @@ class Hardsigmoid(layers.Layer):
return name_str
class Softplus(layers.Layer):
class Softplus(Layer):
r"""
Softplus Activation
......@@ -689,7 +689,7 @@ class Softplus(layers.Layer):
name_str)
class Softshrink(layers.Layer):
class Softshrink(Layer):
r"""
Softshrink Activation
......@@ -734,7 +734,7 @@ class Softshrink(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str)
class Softsign(layers.Layer):
class Softsign(Layer):
r"""
Softsign Activation
......@@ -773,7 +773,7 @@ class Softsign(layers.Layer):
return name_str
class Swish(layers.Layer):
class Swish(Layer):
r"""
Swish Activation.
......@@ -812,7 +812,7 @@ class Swish(layers.Layer):
return name_str
class Tanhshrink(layers.Layer):
class Tanhshrink(Layer):
"""
Tanhshrink Activation
......@@ -851,7 +851,7 @@ class Tanhshrink(layers.Layer):
return name_str
class ThresholdedReLU(layers.Layer):
class ThresholdedReLU(Layer):
r"""
Thresholded ReLU Activation
......@@ -895,7 +895,7 @@ class ThresholdedReLU(layers.Layer):
return 'threshold={}{}'.format(self._threshold, name_str)
class Silu(layers.Layer):
class Silu(Layer):
"""
Silu Activation.
.. math::
......@@ -933,7 +933,7 @@ class Silu(layers.Layer):
return name_str
class LogSigmoid(layers.Layer):
class LogSigmoid(Layer):
r"""
LogSigmoid Activation.
......@@ -972,7 +972,7 @@ class LogSigmoid(layers.Layer):
return name_str
class Softmax(layers.Layer):
class Softmax(Layer):
r"""
Softmax Activation.
......@@ -1099,7 +1099,7 @@ class Softmax(layers.Layer):
return 'axis={}{}'.format(self._axis, name_str)
class LogSoftmax(layers.Layer):
class LogSoftmax(Layer):
r"""
This operator implements the log_softmax layer. The calculation process is as follows:
......@@ -1157,7 +1157,7 @@ class LogSoftmax(layers.Layer):
return 'axis={}{}'.format(self._axis, name_str)
class Maxout(layers.Layer):
class Maxout(Layer):
r"""
Maxout Activation.
......
......@@ -15,10 +15,10 @@
# TODO: define the common classes to build a neural network
import paddle
from ...fluid.dygraph import Flatten # noqa: F401
from ...fluid.dygraph import layers
from ...fluid.framework import in_dygraph_mode
from .. import functional as F
from ...fluid.framework import _dygraph_tracer
from paddle.nn import Layer
__all__ = []
......@@ -30,7 +30,7 @@ def _npairs(x, n):
return x
class Linear(layers.Layer):
class Linear(Layer):
r"""
Fully-connected linear transformation layer. For each input :math:`X` ,
......@@ -135,7 +135,7 @@ class Linear(layers.Layer):
self.weight.shape[0], self.weight.shape[1], self._dtype, name_str)
class Upsample(layers.Layer):
class Upsample(Layer):
"""
This op resizes a batch of images.
......@@ -385,7 +385,7 @@ class Upsample(layers.Layer):
self.data_format, name_str)
class UpsamplingNearest2D(layers.Layer):
class UpsamplingNearest2D(Layer):
"""
This op upsamples a batch of images, using nearest neighbours' pixel values.
The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w),
......@@ -470,7 +470,7 @@ class UpsamplingNearest2D(layers.Layer):
name_str)
class UpsamplingBilinear2D(layers.Layer):
class UpsamplingBilinear2D(Layer):
"""
This op upsamples a batch of images, using bilinear' pixel values.
The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w),
......@@ -556,7 +556,7 @@ class UpsamplingBilinear2D(layers.Layer):
name_str)
class Bilinear(layers.Layer):
class Bilinear(Layer):
r"""
This layer performs bilinear on two inputs.
......@@ -651,7 +651,7 @@ class Bilinear(layers.Layer):
self._dtype, name_str)
class Dropout(layers.Layer):
class Dropout(Layer):
"""
Dropout is a regularization technique for reducing overfitting by preventing
neuron co-adaption during training as described in the paper:
......@@ -725,7 +725,7 @@ class Dropout(layers.Layer):
name_str)
class Dropout2D(layers.Layer):
class Dropout2D(Layer):
"""
Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` ,
a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently
......@@ -786,7 +786,7 @@ class Dropout2D(layers.Layer):
name_str)
class Dropout3D(layers.Layer):
class Dropout3D(Layer):
"""
Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` ,
a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently
......@@ -847,7 +847,7 @@ class Dropout3D(layers.Layer):
name_str)
class AlphaDropout(layers.Layer):
class AlphaDropout(Layer):
"""
Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with
zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and
......@@ -900,7 +900,7 @@ class AlphaDropout(layers.Layer):
return 'p={}{}'.format(self.p, name_str)
class Pad1D(layers.Layer):
class Pad1D(Layer):
"""
This interface is used to construct a callable object of the ``Pad1D`` class.
Pad tensor according to 'pad', 'mode' and 'value'.
......@@ -981,7 +981,7 @@ class Pad1D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str)
class Pad2D(layers.Layer):
class Pad2D(Layer):
"""
This interface is used to construct a callable object of the ``Pad2D`` class.
Pad tensor according to 'pad', 'mode' and 'value'.
......@@ -1065,7 +1065,7 @@ class Pad2D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str)
class Pad3D(layers.Layer):
class Pad3D(Layer):
"""
This interface is used to construct a callable object of the ``Pad3D`` class.
Pad tensor according to 'pad', 'mode' and 'value'.
......@@ -1149,7 +1149,7 @@ class Pad3D(layers.Layer):
self._pad, self._mode, self._value, self._data_format, name_str)
class CosineSimilarity(layers.Layer):
class CosineSimilarity(Layer):
"""
This interface is used to compute cosine similarity between x1 and x2 along axis.
......@@ -1206,7 +1206,7 @@ class CosineSimilarity(layers.Layer):
return 'axis={_axis}, eps={_eps}'.format(**self.__dict__)
class Embedding(layers.Layer):
class Embedding(Layer):
r"""
**Embedding Layer**
......@@ -1367,7 +1367,7 @@ class Embedding(layers.Layer):
return main_str.format(**self.__dict__)
class Unfold(layers.Layer):
class Unfold(Layer):
"""
This op returns a col buffer of sliding local blocks of input x, also known
as im2col for batched 2D image tensors. For each block under the convolution filter,
......
......@@ -13,7 +13,7 @@
# limitations under the License.
from collections import OrderedDict
from ...fluid.dygraph.layers import Layer
from .. import Layer
from collections.abc import Iterable, Mapping
__all__ = []
......
......@@ -19,8 +19,8 @@ import numpy as np
from ...fluid import get_flags
from ...fluid import core
from ...device import get_cudnn_version
from ...fluid.dygraph import layers
from ...fluid.initializer import Normal
from .. import Layer
from ..initializer import Normal
from .. import functional as F
from ...fluid.layers import utils
from ..functional.conv import _update_padding_nd
......@@ -31,7 +31,7 @@ __all__ = []
def _get_default_param_initializer(num_channels, filter_size):
filter_elem_num = num_channels * np.prod(filter_size)
std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0)
return Normal(0.0, std)
def _reverse_repeat_list(t, n):
......@@ -42,7 +42,7 @@ def _reverse_repeat_list(t, n):
return list(x for x in reversed(t) for _ in range(n))
class _ConvNd(layers.Layer):
class _ConvNd(Layer):
def __init__(self,
in_channels,
out_channels,
......@@ -127,7 +127,7 @@ class _ConvNd(layers.Layer):
return None
filter_elem_num = np.prod(self._kernel_size) * self._in_channels
std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0)
return Normal(0.0, std)
self.weight = self.create_parameter(
shape=filter_shape,
......
......@@ -15,7 +15,7 @@
import numpy as np
import paddle
from ...fluid.dygraph import layers
from .. import Layer
from ...fluid.framework import core, in_dygraph_mode
from ...fluid.data_feeder import check_variable_and_dtype, check_type
from ...fluid.layer_helper import LayerHelper
......@@ -24,7 +24,7 @@ from paddle import _C_ops
__all__ = []
class PairwiseDistance(layers.Layer):
class PairwiseDistance(Layer):
r"""
This operator computes the pairwise distance between two vectors. The
distance is calculated by p-oreder norm:
......@@ -87,7 +87,7 @@ class PairwiseDistance(layers.Layer):
'PairwiseDistance')
check_variable_and_dtype(y, 'y', ['float32', 'float64'],
'PairwiseDistance')
sub = paddle.fluid.layers.elementwise_sub(x, y)
sub = paddle.subtract(x, y)
helper = LayerHelper("PairwiseDistance", name=self.name)
attrs = {
......
......@@ -20,11 +20,12 @@ import paddle.fluid.core as core
import paddle
from .. import functional as F
from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator
from .. import Layer
__all__ = []
class BCEWithLogitsLoss(fluid.dygraph.Layer):
class BCEWithLogitsLoss(Layer):
r"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
......@@ -128,7 +129,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):
return out
class CrossEntropyLoss(fluid.dygraph.Layer):
class CrossEntropyLoss(Layer):
r"""
By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
......@@ -407,7 +408,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
return ret
class HSigmoidLoss(fluid.dygraph.Layer):
class HSigmoidLoss(Layer):
"""
Hierarchical Sigmoid Layer.
......@@ -529,7 +530,7 @@ class HSigmoidLoss(fluid.dygraph.Layer):
return out
class MSELoss(fluid.dygraph.layers.Layer):
class MSELoss(Layer):
r"""
**Mean Square Error Loss**
Computes the mean square error (squared L2 norm) of given input and label.
......@@ -596,8 +597,7 @@ class MSELoss(fluid.dygraph.layers.Layer):
fluid.data_feeder.check_variable_and_dtype(
label, 'label', ['float32', 'float64'], 'MSELoss')
square_out = fluid.layers.square(
fluid.layers.elementwise_sub(input, label))
square_out = paddle.square(paddle.subtract(input, label))
if self.reduction == 'none':
return square_out
......@@ -608,7 +608,7 @@ class MSELoss(fluid.dygraph.layers.Layer):
return getattr(fluid.layers, reduce_op)(square_out)
class L1Loss(fluid.dygraph.Layer):
class L1Loss(Layer):
r"""
This interface is used to construct a callable object of the ``L1Loss`` class.
The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows.
......@@ -687,7 +687,7 @@ class L1Loss(fluid.dygraph.Layer):
input, label, self.reduction, name=self.name)
class BCELoss(fluid.dygraph.Layer):
class BCELoss(Layer):
"""
This interface is used to construct a callable object of the ``BCELoss`` class.
The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input``
......@@ -777,7 +777,7 @@ class BCELoss(fluid.dygraph.Layer):
return out
class NLLLoss(fluid.dygraph.Layer):
class NLLLoss(Layer):
r"""
:alias_main: paddle.nn.NLLLoss
:alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss
......@@ -886,7 +886,7 @@ class NLLLoss(fluid.dygraph.Layer):
name=self._name)
class KLDivLoss(fluid.dygraph.Layer):
class KLDivLoss(Layer):
r"""
This interface calculates the Kullback-Leibler divergence loss
between Input(X) and Input(Target). Notes that Input(X) is the
......@@ -959,7 +959,7 @@ class KLDivLoss(fluid.dygraph.Layer):
return out
class MarginRankingLoss(fluid.dygraph.Layer):
class MarginRankingLoss(Layer):
r"""
This interface is used to construct a callable object of the ``MarginRankingLoss`` class.
......@@ -1031,7 +1031,7 @@ class MarginRankingLoss(fluid.dygraph.Layer):
return out
class CTCLoss(fluid.dygraph.Layer):
class CTCLoss(Layer):
"""
An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc)
......@@ -1127,7 +1127,7 @@ class CTCLoss(fluid.dygraph.Layer):
norm_by_times=norm_by_times)
class SmoothL1Loss(fluid.dygraph.Layer):
class SmoothL1Loss(Layer):
r"""
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
term if the absolute element-wise error falls below 1 and an L1 term otherwise.
......
......@@ -30,15 +30,13 @@
import six
from ...fluid.dygraph import BatchNorm # noqa: F401
from ...fluid.dygraph import SpectralNorm # noqa: F401
from ...fluid.dygraph import layers
from ...framework import get_default_dtype, set_default_dtype
from ...fluid.framework import in_dygraph_mode
from ...fluid.initializer import Constant
from ...fluid.param_attr import ParamAttr
from ..initializer import Constant
from ...framework import ParamAttr
from ...fluid.data_feeder import check_variable_and_dtype, check_type
from ...fluid import core, dygraph_utils
......@@ -47,14 +45,15 @@ from ..functional import batch_norm, layer_norm, instance_norm
import numpy as np
import numbers
import warnings
from ...fluid.dygraph.base import no_grad
from ...framework import no_grad
from .. import functional as F
from paddle import _C_ops
from .. import Layer
__all__ = []
class _InstanceNormBase(layers.Layer):
class _InstanceNormBase(Layer):
"""
This class is based class for InstanceNorm1D, 2d, 3d.
......@@ -317,7 +316,7 @@ class InstanceNorm3D(_InstanceNormBase):
len(input.shape)))
class GroupNorm(layers.Layer):
class GroupNorm(Layer):
"""
This interface is used to construct a callable object of the ``GroupNorm`` class.
For more details, refer to code examples.
......@@ -436,7 +435,7 @@ class GroupNorm(layers.Layer):
self._num_groups, self._num_channels, self._epsilon)
class LayerNorm(layers.Layer):
class LayerNorm(Layer):
r"""
:alias_main: paddle.nn.LayerNorm
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
......@@ -544,7 +543,7 @@ class LayerNorm(layers.Layer):
self._epsilon)
class _BatchNormBase(layers.Layer):
class _BatchNormBase(Layer):
"""
BatchNorm base .
"""
......@@ -1181,7 +1180,7 @@ class SyncBatchNorm(_BatchNormBase):
return layer_output
class LocalResponseNorm(layers.Layer):
class LocalResponseNorm(Layer):
"""
Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions.
For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
......
......@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper
from .. import functional as F
from .. import Layer
__all__ = []
class AvgPool1D(layers.Layer):
class AvgPool1D(Layer):
r"""
This operation applies a 1D average pooling over an input signal composed
of several input planes, based on the input, output_size, return_mask parameters.
......@@ -109,7 +109,7 @@ class AvgPool1D(layers.Layer):
**self.__dict__)
class AvgPool2D(layers.Layer):
class AvgPool2D(Layer):
r"""
This operation applies 2D average pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
......@@ -220,7 +220,7 @@ class AvgPool2D(layers.Layer):
**self.__dict__)
class AvgPool3D(layers.Layer):
class AvgPool3D(Layer):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
......@@ -318,7 +318,7 @@ class AvgPool3D(layers.Layer):
**self.__dict__)
class MaxPool1D(layers.Layer):
class MaxPool1D(Layer):
"""
This operation applies 1D max pooling over input signal
composed of several input planes based on the input,
......@@ -412,7 +412,7 @@ class MaxPool1D(layers.Layer):
**self.__dict__)
class MaxPool2D(layers.Layer):
class MaxPool2D(Layer):
r"""
This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
......@@ -522,7 +522,7 @@ class MaxPool2D(layers.Layer):
**self.__dict__)
class MaxPool3D(layers.Layer):
class MaxPool3D(Layer):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
......@@ -620,7 +620,7 @@ class MaxPool3D(layers.Layer):
**self.__dict__)
class AdaptiveAvgPool1D(layers.Layer):
class AdaptiveAvgPool1D(Layer):
r"""
This operation applies a 1D adaptive average pooling over an input signal composed
......@@ -693,7 +693,7 @@ class AdaptiveAvgPool1D(layers.Layer):
return 'output_size={}'.format(self.output_size)
class AdaptiveAvgPool2D(layers.Layer):
class AdaptiveAvgPool2D(Layer):
r"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
......@@ -779,7 +779,7 @@ class AdaptiveAvgPool2D(layers.Layer):
return 'output_size={}'.format(self._output_size)
class AdaptiveAvgPool3D(layers.Layer):
class AdaptiveAvgPool3D(Layer):
r"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
......@@ -872,7 +872,7 @@ class AdaptiveAvgPool3D(layers.Layer):
return 'output_size={}'.format(self._output_size)
class AdaptiveMaxPool1D(layers.Layer):
class AdaptiveMaxPool1D(Layer):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
......@@ -956,7 +956,7 @@ class AdaptiveMaxPool1D(layers.Layer):
self.return_mask)
class AdaptiveMaxPool2D(layers.Layer):
class AdaptiveMaxPool2D(Layer):
"""
This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and
......@@ -1037,7 +1037,7 @@ class AdaptiveMaxPool2D(layers.Layer):
self._return_mask)
class AdaptiveMaxPool3D(layers.Layer):
class AdaptiveMaxPool3D(Layer):
"""
This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are
determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus
......
......@@ -28,7 +28,7 @@ from paddle import framework
from paddle.device import get_device, get_cudnn_version
from paddle.nn import functional as F
from paddle.nn import initializer as I
from paddle.fluid.dygraph import Layer, LayerList
from paddle.nn import Layer, LayerList
from paddle.fluid.layers import utils
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
from paddle.fluid.data_feeder import convert_dtype
......@@ -962,7 +962,7 @@ class RNNBase(LayerList):
# for static-graph, append coalesce_tensor into startup program
with fluid.program_guard(fluid.default_startup_program(),
fluid.default_startup_program()):
with framework.no_grad():
with paddle.no_grad():
self._helper.append_op(
type="coalesce_tensor",
inputs={"Input": self._all_weights},
......@@ -1040,11 +1040,11 @@ class RNNBase(LayerList):
])
else:
initial_states = [initial_states] if isinstance(
initial_states,
paddle.fluid.framework.Variable) else initial_states
initial_states, paddle.static.Variable) else initial_states
if self.could_use_cudnn and (not fluid.core.is_compiled_with_rocm() or
sequence_length is None):
if self.could_use_cudnn and (
not paddle.device.is_compiled_with_rocm() or
sequence_length is None):
# Add CPU kernel and dispatch in backend later
return self._cudnn_impl(inputs, initial_states, sequence_length)
......
......@@ -24,8 +24,8 @@ from .norm import LayerNorm
from .. import functional as F
from ... import tensor
from ...fluid import layers
from ...fluid.dygraph import Layer, LayerList
from ...fluid.param_attr import ParamAttr
from .. import Layer, LayerList
from ...framework import ParamAttr
from ...fluid.data_feeder import convert_dtype
__all__ = []
......
......@@ -14,13 +14,13 @@
# TODO: define specitial functions used in computer vision task
from ...fluid.dygraph import layers
from .. import Layer
from .. import functional
__all__ = []
class PixelShuffle(layers.Layer):
class PixelShuffle(Layer):
"""
PixelShuffle Layer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册