diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index b19d50a6ad6afa312f5e695583174e56bf490755..8143bde302a988734a8acb07621560e144fc8954 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
+paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index eb6895f2a69ade2f5e5c3fe7742fab6fc0a75491..811eec90720d4051e7e4315257bc7517ce2c893f 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -66,6 +66,8 @@ from . import compiler
from .compiler import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable
from . import install_check
+from .dygraph.nn import *
+from .dygraph.layers import *
Tensor = LoDTensor
diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
index d55dbbb9c72cb887e169849c3a3e32a13c202a7b..bf484b35c7bf9a2b17126789ff247bd73095fe7b 100644
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def enabled():
- return framework._in_dygraph_mode()
+ return framework.in_dygraph_mode()
@signature_safe_contextmanager
diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py
index f992ae0576c81ed98a3e9f7a446b0c2e808622ea..f2b01aece7bf86b1a195296ba49a626721213b7a 100644
--- a/python/paddle/fluid/dygraph/checkpoint.py
+++ b/python/paddle/fluid/dygraph/checkpoint.py
@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None):
Examples:
.. code-block:: python
- my_layer = layer(fluid.dygraph.Layer)
+ my_layer = layer(fluid.Layer)
param_path = "./my_paddle_model"
param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1']
- or:
- my_layer = layer(fluid.dygraph.Layer)
- param_path = "./my_paddle_model"
- filename = "model.file"
- param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path,
- filename=filename)
- param_1 = param_dict['PtbModel_0.w_1']
-
"""
if isinstance(vardict, collections.OrderedDict):
return _load_var_from_file(vardict, dirname, filename)
diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py
index f8e607aab8491a45958843745bd7aa7e3021fc15..b757f8fff24fcee8367a27ff87557d5f1e886c05 100644
--- a/python/paddle/fluid/dygraph/layer_object_helper.py
+++ b/python/paddle/fluid/dygraph/layer_object_helper.py
@@ -16,7 +16,7 @@ from __future__ import print_function
import copy
import six
-from ..framework import Parameter, _in_dygraph_mode
+from ..framework import Parameter, in_dygraph_mode
from ..param_attr import ParamAttr
from .. import core
from six.moves import zip
diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
index 014ee41f4c5aa280fb5b366d8f1704290cc067d4..39e06e3486cd5479f69cbdb67811f03bd9646123 100644
--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -139,14 +139,14 @@ class Layer(core.Layer):
def clear_gradients(self):
for p in self.parameters():
- p._clear_gradient()
+ p.clear_gradient()
- def _build_once(self, *args):
+ def build_once(self, *args):
pass
def __call__(self, *inputs):
if not self._built:
- self._build_once(*inputs)
+ self.build_once(*inputs)
outputs = self.forward(*inputs)
self._built = True
diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index 527c37cb2c4f1540fb8c464dfdbe061b2899f678..6384e5678837b9fa64e89def6796977f2fa54116 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -19,7 +19,7 @@ from six.moves import reduce
from .. import core
from ..layers import utils
from . import layers
-from ..framework import Variable, _in_dygraph_mode, OpProtoHolder, Parameter
+from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np
@@ -33,6 +33,109 @@ __all__ = [
class Conv2D(layers.Layer):
+ """
+ The convolution2D layer calculates the output based on the input, filter
+ and strides, paddings, dilations, groups parameters. Input and
+ Output are in NCHW format, where N is batch size, C is the number of
+ channels, H is the height of the feature, and W is the width of the feature.
+ Filter is in MCHW format, where M is the number of output image channels,
+ C is the number of input image channels, H is the height of the filter,
+ and W is the width of the filter. If the groups is greater than 1,
+ C will equal the number of input image channels divided by the groups.
+ Please refer to UFLDL's `convolution
+ `_
+ for more detials.
+ If bias attribution and activation type are provided, bias is added to the
+ output of the convolution, and the corresponding activation function is
+ applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ Where:
+
+ * :math:`X`: Input value, a tensor with NCHW format.
+ * :math:`W`: Filter value, a tensor with MCHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
+
+ - Output:
+
+ Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
+ W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
+
+ Args:
+ input (Variable): The input image with [N, C, H, W] format.
+ num_filters(int): The number of filter. It is as same as the output
+ image channel.
+ filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
+ it must contain two integers, (filter_size_H, filter_size_W).
+ Otherwise, the filter will be a square.
+ stride (int|tuple): The stride size. If stride is a tuple, it must
+ contain two integers, (stride_H, stride_W). Otherwise, the
+ stride_H = stride_W = stride. Default: stride = 1.
+ padding (int|tuple): The padding size. If padding is a tuple, it must
+ contain two integers, (padding_H, padding_W). Otherwise, the
+ padding_H = padding_W = padding. Default: padding = 0.
+ dilation (int|tuple): The dilation size. If dilation is a tuple, it must
+ contain two integers, (dilation_H, dilation_W). Otherwise, the
+ dilation_H = dilation_W = dilation. Default: dilation = 1.
+ groups (int): The groups number of the Conv2d Layer. According to grouped
+ convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+ the first half of the filters is only connected to the first half
+ of the input channels, while the second half of the filters is only
+ connected to the second half of the input channels. Default: groups=1.
+ param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+ of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
+ will create ParamAttr as param_attr. If the Initializer of the param_attr
+ is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
+ and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
+ bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
+ If it is set to False, no bias will be added to the output units.
+ If it is set to None or one attribute of ParamAttr, conv2d
+ will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+ is not set, the bias is initialized zero. Default: None.
+ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act (str): Activation type, if it is set to None, activation is not appended.
+ Default: None
+ name (str|None): A name for this layer(optional). If set None, the layer
+ will be named automatically. Default: None
+
+ Returns:
+ Variable: The tensor variable storing the convolution and \
+ non-linearity activation result.
+
+ Raises:
+ ValueError: If the shapes of input, filter_size, stride, padding and
+ groups mismatch.
+
+ Examples:
+ .. code-block:: python
+
+ data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
+ conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
+ """
+
def __init__(self,
name_scope,
num_channels,
@@ -265,7 +368,7 @@ class Conv3D(layers.Layer):
self._param_attr = param_attr
self._bias_attr = bias_attr
- def _build_once(self, input):
+ def build_once(self, input):
num_channels = input.shape[1]
self._dtype = self._helper.input_dtype(input)
@@ -332,6 +435,116 @@ class Conv3D(layers.Layer):
class Conv3DTranspose(layers.Layer):
+ """
+ **Convlution3D transpose layer**
+
+ The convolution3D transpose layer calculates the output based on the input,
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
+ are in NCDHW format. Where N is batch size, C is the number of channels,
+ D is the depth of the feature, H is the height of the feature, and W
+ is the width of the feature. Parameters(dilations, strides, paddings) are
+ two elements. These two elements represent height and width, respectively.
+ The details of convolution transpose layer, please refer to the following
+ explanation and references `therein `_.
+ If bias attribution and activation type are provided, bias is added to
+ the output of the convolution, and the corresponding activation function
+ is applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ In the above equation:
+
+ * :math:`X`: Input value, a tensor with NCDHW format.
+ * :math:`W`: Filter value, a tensor with MCDHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
+
+ - Output:
+
+ Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
+ H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
+ W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
+
+ Args:
+ input(Variable): The input image with [N, C, D, H, W] format.
+ num_filters(int): The number of the filter. It is as same as the output
+ image channel.
+ output_size(int|tuple|None): The output image size. If output size is a
+ tuple, it must contain three integers, (image_D, image_H, image_W). This
+ parameter only works when filter_size is None.
+ filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
+ it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
+ Otherwise, the filter will be a square. None if use output size to
+ calculate filter_size.
+ padding(int|tuple): The padding size. If padding is a tuple, it must
+ contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
+ padding_D = padding_H = padding_W = padding. Default: padding = 0.
+ stride(int|tuple): The stride size. If stride is a tuple, it must
+ contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
+ stride_D = stride_H = stride_W = stride. Default: stride = 1.
+ dilation(int|tuple): The dilation size. If dilation is a tuple, it must
+ contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
+ dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
+ groups(int): The groups number of the Conv3d transpose layer. Inspired by
+ grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+ when group=2, the first half of the filters is only connected to the
+ first half of the input channels, while the second half of the
+ filters is only connected to the second half of the input channels.
+ Default: groups=1
+ param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+ of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
+ will create ParamAttr as param_attr. If the Initializer of the param_attr
+ is not set, the parameter is initialized with Xavier. Default: None.
+ bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d_transpose.
+ If it is set to False, no bias will be added to the output units.
+ If it is set to None or one attribute of ParamAttr, conv3d_transpose
+ will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+ is not set, the bias is initialized zero. Default: None.
+ use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act (str): Activation type, if it is set to None, activation is not appended.
+ Default: None.
+ name(str|None): A name for this layer(optional). If set None, the layer
+ will be named automatically.
+
+ Returns:
+ Variable: The tensor variable storing the convolution transpose result.
+
+ Raises:
+ ValueError: If the shapes of input, filter_size, stride, padding and
+ groups mismatch.
+
+ Examples:
+ .. code-block:: python
+
+ conv3d_transpose = nn.Conv3DTranspose(
+ 'Conv3DTranspose',
+ num_filters=12,
+ filter_size=12,
+ use_cudnn=False)
+ transpose_res = conv3d_transpose(base.to_variable(input_array))
+ """
+
def __init__(self,
name_scope,
num_filters,
@@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer):
self._bias_attr = bias_attr
self._act = act
- def _build_once(self, input):
+ def build_once(self, input):
self._dtype = self._helper.input_dtype(input)
self._input_channel = input.shape[1]
@@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer):
class Pool2D(layers.Layer):
+ """
+ ${comment}
+
+ Args:
+ input (Variable): The input tensor of pooling operator. The format of
+ input tensor is NCHW, where N is batch size, C is
+ the number of channels, H is the height of the
+ feature, and W is the width of the feature.
+ pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+ it must contain two integers, (pool_size_Height, pool_size_Width).
+ Otherwise, the pool kernel size will be a square of an int.
+ pool_type: ${pooling_type_comment}
+ pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+ it must contain two integers, (pool_stride_Height, pool_stride_Width).
+ Otherwise, the pool stride size will be a square of an int.
+ pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple,
+ it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width).
+ Otherwise, the pool padding size will be a square of an int.
+ global_pooling (bool): ${global_pooling_comment}
+ use_cudnn (bool): ${use_cudnn_comment}
+ ceil_mode (bool): ${ceil_mode_comment}
+ name (str|None): A name for this layer(optional). If set None, the
+ layer will be named automatically.
+ exclusive (bool): Whether to exclude padding points in average pooling
+ mode, default is true
+
+ Returns:
+ Variable: The pooling result.
+
+ Raises:
+ ValueError: If 'pool_type' is not "max" nor "avg"
+ ValueError: If 'global_pooling' is False and 'pool_size' is -1
+ ValueError: If 'use_cudnn' is not a bool value.
+
+ Examples:
+
+ .. code-block:: python
+
+ data = fluid.layers.data(
+ name='data', shape=[3, 32, 32], dtype='float32')
+ pool2d = fluid.Pool2D("pool2d",pool_size=2,
+ pool_type='max',
+ pool_stride=1,
+ global_pooling=False)
+
+ pool2d_res = pool2d(data)
+ """
+
def __init__(self,
name_scope,
pool_size=-1,
@@ -495,6 +756,102 @@ class Pool2D(layers.Layer):
class FC(layers.Layer):
+ """
+ **Fully Connected Layer**
+
+ This function creates a fully connected layer in the network. It can take
+ one or multiple tensors as its inputs(input can be a list of Variable, see
+ Args in detail). It creates a variable called weights for each input tensor,
+ which represents a fully connected weight matrix from each input unit to
+ each output unit. The fully connected layer multiplies each input tensor
+ with its corresponding weight to produce an output Tensor with shape [M, `size`],
+ where M is batch size. If multiple input tensors are given, the results of
+ multiple output tensors with shape [M, `size`] will be summed up. If bias_attr
+ is not None, a bias variable will be created and added to the output.
+ Finally, if activation is not None, it will be applied to the output as well.
+
+ When the input is single tensor:
+
+ .. math::
+
+ Out = Act({XW + b})
+
+ When the input are multiple tensors:
+
+ .. math::
+
+ Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
+
+ In the above equation:
+
+ * :math:`N`: Number of the input. N equals to len(input) if input is list of Variable.
+ * :math:`X_i`: The i-th input tensor.
+ * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
+ * :math:`b`: The bias parameter created by this layer (if needed).
+ * :math:`Act`: The activation function.
+ * :math:`Out`: The output tensor.
+
+ See below for an example.
+
+ .. code-block:: text
+
+ Given:
+ data_1.data = [[[0.1, 0.2],
+ [0.3, 0.4]]]
+ data_1.shape = (1, 2, 2) # 1 is batch_size
+
+ data_2 = [[[0.1, 0.2, 0.3]]]
+ data_2.shape = (1, 1, 3)
+
+ out = fluid.layers.fc(input=[data_1, data_2], size=2)
+
+ Then:
+ out.data = [[0.18669507, 0.1893476]]
+ out.shape = (1, 2)
+
+ Args:
+ input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
+ the input tensor(s) is at least 2.
+ size(int): The number of output units in this layer.
+ num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than
+ two dimensions. If this happens, the multidimensional tensor will first be flattened
+ into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
+ tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
+ dimensions will be flatten to form the first dimension of the final matrix (height of
+ the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
+ form the second dimension of the final matrix (width of the matrix). For example, suppose
+ `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
+ Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
+ param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
+ parameters/weights of this layer.
+ bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
+ of this layer. If it is set to False, no bias will be added to the output units.
+ If it is set to None, the bias is initialized zero. Default: None.
+ act (str, default None): Activation to be applied to the output of this layer.
+ is_test(bool): A flag indicating whether execution is in test phase.
+ name (str, default None): The name of this layer.
+
+ Returns:
+ Variable: The transformation result.
+
+ Raises:
+ ValueError: If rank of the input tensor is less than 2.
+
+ Examples:
+ .. code-block:: python
+
+ # when input is single tensor
+ data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+ fc = fluid.FC("fc", size=1000, act="tanh")
+ fc_res = fc(data)
+
+ # when input are multiple tensors
+ data_1 = fluid.layers.data(name="data_1", shape=[32, 32], dtype="float32")
+ data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32")
+ fc = fluid.FC("fc", size=1000, act="tanh")
+ fc_res = fc([data_1, data_2])
+ """
+
def __init__(self,
name_scope,
size,
@@ -522,7 +879,7 @@ class FC(layers.Layer):
assert isinstance(value, Parameter)
self.__w[i] = value
- def _build_once(self, input):
+ def build_once(self, input):
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
@@ -591,6 +948,91 @@ class FC(layers.Layer):
class BatchNorm(layers.Layer):
+ """
+ **Batch Normalization Layer**
+
+ Can be used as a normalizer function for conv2d and fully_connected operations.
+ The required data format for this layer is one of the following:
+
+ 1. NHWC `[batch, in_height, in_width, in_channels]`
+
+ 2. NCHW `[batch, in_channels, in_height, in_width]`
+
+ Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
+ Internal Covariate Shift `_
+ for more details.
+
+ :math:`input` is the input features over a mini-batch.
+
+ .. math::
+
+ \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
+ \ mini-batch\ mean \\\\
+ \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
+ \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
+ \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+ \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
+
+
+ When use_global_stats = True, the :math:`\\mu_{\\beta}`
+ and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
+ They are global (or running) statistics. (It usually got from the
+ pre-trained model.)
+ The training and testing (or inference) have the same behavior:
+
+ .. math::
+
+ \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+ \\sigma_{\\beta}^{2} + \\epsilon}} \\\\
+ y_i &\\gets \\gamma \\hat{x_i} + \\beta
+
+ Args:
+ input(variable): The rank of input variable can be 2, 3, 4, 5.
+ act(string, Default None): Activation type, linear|relu|prelu|...
+ is_test (bool, Default False): A flag indicating whether it is in
+ test phrase or not.
+ momentum(float, Default 0.9): The value used for the moving_mean and
+ moving_var computation. The updated formula is:
+ :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
+ :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
+ Default is 0.9.
+ epsilon(float, Default 1e-05): A value added to the denominator for
+ numerical stability. Default is 1e-5.
+ param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
+ of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
+ will create ParamAttr as param_attr. If the Initializer of the param_attr
+ is not set, the parameter is initialized with Xavier. Default: None.
+ bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
+ If it is set to None or one attribute of ParamAttr, batch_norm
+ will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+ is not set, the bias is initialized zero. Default: None.
+ data_layout(string, default NCHW): NCHW|NHWC
+ in_place(bool, Default False): Make the input and output of batch norm reuse memory.
+ name(string, Default None): A name for this layer(optional). If set None, the layer
+ will be named automatically.
+ moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
+ moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
+ do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
+ fuse_with_relu (bool): if True, this OP performs relu after batch norm.
+ use_global_stats(bool, Default False): Whether to use global mean and
+ variance. In inference or test mode, set use_global_stats to true
+ or is_test to true, and the behavior is equivalent.
+ In train mode, when setting use_global_stats True, the global mean
+ and variance are also used during train period.
+
+ Returns:
+ Variable: A tensor variable which is the result after applying batch normalization on the input.
+
+ Examples:
+
+ .. code-block:: python
+ fc = fluid.FC('fc', size=200, param_attr='fc1.w')
+ hidden1 = fc(x)
+ batch_norm = fluid.BatchNorm("batch_norm", 10)
+ hidden2 = batch_norm(hidden1)
+ """
+
def __init__(self,
name_scope,
num_channels,
@@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype,
default_initializer=Constant(1.0))
if use_global_stats and self._param_attr.learning_rate == 0.:
- self._scale._stop_gradient = True
+ self._scale.stop_gradient = True
self._bias = self.create_parameter(
attr=self._param_attr,
@@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype,
is_bias=True)
if use_global_stats and self._param_attr.learning_rate == 0.:
- self._bias._stop_gradient = True
+ self._bias.stop_gradient = True
self._mean = self.create_parameter(
attr=ParamAttr(
@@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=self._dtype)
- self._mean._stop_gradient = True
+ self._mean.stop_gradient = True
self._variance = self.create_parameter(
attr=ParamAttr(
@@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=self._dtype)
- self._variance._stop_gradient = True
+ self._variance.stop_gradient = True
self._in_place = in_place
self._momentum = momentum
@@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer):
self._fuse_with_relu = fuse_with_relu
self._use_global_stats = use_global_stats
- def _build_once(self, input):
+ def build_once(self, input):
pass
def forward(self, input):
@@ -747,7 +1189,7 @@ class Embedding(layers.Layer):
dict_size = len(dataset.ids)
input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
- embedding = fluid.dygraph.Embedding(size=[dict_size, 16])
+ embedding = fluid.Embedding(size=[dict_size, 16])
fc = embedding(input)
"""
@@ -797,70 +1239,70 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer):
- def __init__(self,
- name_scope,
- scale=True,
- shift=True,
- begin_norm_axis=1,
- epsilon=1e-05,
- param_attr=None,
- bias_attr=None,
- act=None):
- """
- ${comment}
+ """
+ ${comment}
- The formula is as follows:
+ The formula is as follows:
- .. math::
+ .. math::
- \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
+ \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
- \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2}
+ \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2}
- h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
+ h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
- * :math:`a`: the vector representation of the summed inputs to the neurons
- in that layer.
+ * :math:`a`: the vector representation of the summed inputs to the neurons
+ in that layer.
- * :math:`H`: the number of hidden units in a layers
+ * :math:`H`: the number of hidden units in a layers
- * :math:`g`: the trainable scale parameter.
+ * :math:`g`: the trainable scale parameter.
- * :math:`b`: the trainable bias parameter.
+ * :math:`b`: the trainable bias parameter.
- Args:
- input(Variable): The input tensor variable.
- scale(bool): Whether to learn the adaptive gain :math:`g` after
- normalization. Default True.
- shift(bool): Whether to learn the adaptive bias :math:`b` after
- normalization. Default True.
- begin_norm_axis(int): The normalization will be performed along
- dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
- Default 1.
- epsilon(float): The small value added to the variance to prevent
- division by zero. Default 1e-05.
- param_attr(ParamAttr|None): The parameter attribute for the learnable
- gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
- omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
- a default :code:`ParamAttr` would be added as scale. The
- :attr:`param_attr` is initialized as 1 if it is added. Default None.
- bias_attr(ParamAttr|None): The parameter attribute for the learnable
- bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
- omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
- a default :code:`ParamAttr` would be added as bias. The
- :attr:`bias_attr` is initialized as 0 if it is added. Default None.
- act(str): Activation to be applied to the output of layer normalizaiton.
- Default None.
- Returns:
- ${y_comment}
+ Args:
+ input(Variable): The input tensor variable.
+ scale(bool): Whether to learn the adaptive gain :math:`g` after
+ normalization. Default True.
+ shift(bool): Whether to learn the adaptive bias :math:`b` after
+ normalization. Default True.
+ begin_norm_axis(int): The normalization will be performed along
+ dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
+ Default 1.
+ epsilon(float): The small value added to the variance to prevent
+ division by zero. Default 1e-05.
+ param_attr(ParamAttr|None): The parameter attribute for the learnable
+ gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
+ omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
+ a default :code:`ParamAttr` would be added as scale. The
+ :attr:`param_attr` is initialized as 1 if it is added. Default None.
+ bias_attr(ParamAttr|None): The parameter attribute for the learnable
+ bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
+ omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
+ a default :code:`ParamAttr` would be added as bias. The
+ :attr:`bias_attr` is initialized as 0 if it is added. Default None.
+ act(str): Activation to be applied to the output of layer normalizaiton.
+ Default None.
+ Returns:
+ ${y_comment}
- Examples:
+ Examples:
- >>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
- >>> dtype='float32')
- >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
- """
+ >>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
+ >>> dtype='float32')
+ >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
+ """
+ def __init__(self,
+ name_scope,
+ scale=True,
+ shift=True,
+ begin_norm_axis=1,
+ epsilon=1e-05,
+ param_attr=None,
+ bias_attr=None,
+ act=None):
super(LayerNorm, self).__init__(name_scope)
self._scale = scale
self._shift = shift
@@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer):
self._bias_attr = bias_attr
self._act = act
- def _build_once(self, input):
+ def build_once(self, input):
self._dtype = self._helper.input_dtype(input)
input_shape = input.shape
param_shape = [
@@ -1232,7 +1674,7 @@ class NCE(layers.Layer):
'remote_prefetch': remote_prefetch
}
- def _build_once(self, input, label, sample_weight=None):
+ def build_once(self, input, label, sample_weight=None):
assert isinstance(input, Variable)
assert isinstance(label, Variable)
@@ -1318,7 +1760,7 @@ class PRelu(layers.Layer):
raise ValueError('mode should be one of all, channel, element.')
self._alpha_shape = [1]
- def _build_once(self, input):
+ def build_once(self, input):
if self._mode == 'channel':
self._alpha_shape = [1, input.shape[1], 1, 1]
elif self._mode == 'element':
@@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer):
self._name = name
self._inputs = dict()
- def _build_once(self, x, y):
+ def build_once(self, x, y):
self._dtype = self._helper.input_dtype(x)
param_shape = [self._size, x.shape[1], y.shape[1]]
@@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer):
self._output_size = output_size
self._op_type = 'conv2d_transpose'
- def _build_once(self, input):
+ def build_once(self, input):
input_channel = input.shape[1]
if (input_channel == self._groups and
self._num_filters == input_channel and not self._use_cudnn):
@@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer):
bias_attr=None,
param_attr=None,
act=None):
- assert not _in_dygraph_mode(
+ assert not in_dygraph_mode(
), "SequenceConv is not supported by dynamic graph mode yet!"
super(SequenceConv, self).__init__(name_scope)
self._num_filters = num_filters
@@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer):
self._bias_attr = bias_attr
self._param_attr = param_attr
- def _build_once(self, input):
+ def build_once(self, input):
self._dtype = self._helper.input_dtype(input)
filter_shape = [self._filter_size * input.shape[1], self._num_filters]
self._filter_param = self.create_parameter(
@@ -1726,14 +2168,14 @@ class RowConv(layers.Layer):
future_context_size,
param_attr=None,
act=None):
- assert not _in_dygraph_mode(
+ assert not in_dygraph_mode(
), "RowConv is not supported by dynamic graph mode yet!"
super(RowConv, self).__init__(name_scope)
self._act = act
self._param_attr = param_attr
self._future_context_size = future_context_size
- def _build_once(self, input):
+ def build_once(self, input):
self._dtype = self._helper.input_dtype(input)
filter_shape = [self._future_context_size + 1, input.shape[1]]
self._filter_param = self.create_parameter(
@@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer):
if data_layout != 'NCHW':
raise ValueError("unsupported data layout:" + data_layout)
- def _build_once(self, input):
+ def build_once(self, input):
self._dtype = self._helper.input_dtype(input)
param_shape = [input.shape[1]]
if self._bias_attr:
@@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer):
self._eps = eps
self._dim = dim
- def _build_once(self, weight):
+ def build_once(self, weight):
self._dtype = self._helper.input_dtype(weight)
input_shape = weight.shape
h = input_shape[self._dim]
@@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer):
self._bias_attr = bias_attr
self._param_attr = param_attr
- def _build_once(self, nodes_vector, edge_set):
+ def build_once(self, nodes_vector, edge_set):
assert isinstance(nodes_vector, Variable)
assert isinstance(edge_set, Variable)
self._dtype = self._helper.input_dtype(nodes_vector)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 7953d98bcbb826267fa21f6503e55049c8aff5ba..c05e5fb9e3a46e721c20fd9288b89009e32afcbe 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -67,6 +67,7 @@ __all__ = [
'cuda_places',
'cpu_places',
'cuda_pinned_places',
+ 'in_dygraph_mode',
]
EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -79,7 +80,10 @@ _dygraph_tracer_ = None
_dygraph_current_expected_place_ = None
-def _in_dygraph_mode():
+def in_dygraph_mode():
+ '''
+ Returns(bool): True if the program is running in dynamic graph mode
+ '''
return _dygraph_tracer_ is not None
@@ -396,7 +400,7 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
- if _in_dygraph_mode():
+ if in_dygraph_mode():
# record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None)
if not self._ivar:
@@ -482,21 +486,21 @@ class Variable(object):
self.block.vars[name] = self
self.op = None
- self.stop_gradient = stop_gradient
+ self._stop_gradient = stop_gradient
self.is_data = is_data
- def _numpy(self):
+ def numpy(self):
new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
- def _backward(self):
+ def backward(self):
self._ivar._run_backward()
- def _gradient(self):
+ def gradient(self):
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
- def _clear_gradient(self):
+ def clear_gradient(self):
self._ivar._clear_gradient()
def __str__(self):
@@ -516,7 +520,7 @@ class Variable(object):
Returns:
str: The debug string.
"""
- if _in_dygraph_mode():
+ if in_dygraph_mode():
# TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype,
self.shape)
@@ -535,7 +539,7 @@ class Variable(object):
__repr__ = __str__
- def _set_desc(self, input):
+ def set_desc(self, input):
"""
Set the variable description.
@@ -548,43 +552,43 @@ class Variable(object):
self.desc = input
@property
- def _stop_gradient(self):
- if _in_dygraph_mode():
+ def stop_gradient(self):
+ if in_dygraph_mode():
return self._ivar.stop_gradient
else:
- return self.stop_gradient
+ return self._stop_gradient
- @_stop_gradient.setter
- def _stop_gradient(self, s):
- if _in_dygraph_mode():
+ @stop_gradient.setter
+ def stop_gradient(self, s):
+ if in_dygraph_mode():
self._ivar.stop_gradient = s
else:
- self.stop_gradient = s
+ self._stop_gradient = s
@property
def persistable(self):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.persistable
else:
return self.desc.persistable()
@persistable.setter
def persistable(self, p):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.persistable
else:
self.desc.set_persistable(p)
@property
def name(self):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.name
else:
return cpt.to_text(self.desc.name())
@name.setter
def name(self, new_name):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
self._ivar.name = new_name
else:
self.desc.set_name(new_name)
@@ -592,14 +596,14 @@ class Variable(object):
@property
def shape(self):
# convert to tuple, make it as same as numpy API.
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.shape
else:
return tuple(self.desc.shape())
@property
def dtype(self):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.dtype
else:
return self.desc.dtype()
@@ -611,7 +615,7 @@ class Variable(object):
@property
def type(self):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self._ivar.dtype
else:
return self.desc.type()
@@ -721,7 +725,7 @@ class Variable(object):
name=unique_name.generate(".".join(self.name)),
dtype=self.dtype,
persistable=self.persistable,
- stop_gradient=self._stop_gradient, )
+ stop_gradient=self.stop_gradient, )
else:
return self
@@ -930,7 +934,7 @@ class Operator(object):
inputs=None,
outputs=None,
attrs=None):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
if type is None:
raise ValueError(
"`type` to initialized an Operator can not be None.")
@@ -1049,7 +1053,7 @@ class Operator(object):
for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name))
# TODO(minqiyang): could we remove variable's op in static mode?
- if not _in_dygraph_mode():
+ if not in_dygraph_mode():
arg.op = self
self.desc.set_output(out_proto.name, out_arg_names)
@@ -1095,7 +1099,7 @@ class Operator(object):
@property
def type(self):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
return self.iop.type
else:
return self.desc.type()
@@ -1638,7 +1642,7 @@ class Block(object):
Returns:
Operator: the append Operator.
"""
- if _in_dygraph_mode():
+ if in_dygraph_mode():
op = Operator(
block=self,
desc=None,
@@ -1710,7 +1714,7 @@ class Block(object):
return self.ops[start:end]
def _prepend_op(self, *args, **kwargs):
- if _in_dygraph_mode():
+ if in_dygraph_mode():
op = Operator(
self,
None,
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index 6aff93dceaf5cfd299bdc9f68246ed579f248f3c..da2591b98058a2283275cc222194e89240e87ae1 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu()
},
stop_gradient=True)
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape),
value_name: values
})
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values
},
stop_gradient=True)
- if not framework._in_dygraph_mode():
+ if not framework.in_dygraph_mode():
var.op = op
return op
diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py
index 7eb912645e5077d35a2d11d7d09a033d28345e15..11e3c4938bef4a3c97a724798e2f7273c25f06ed 100644
--- a/python/paddle/fluid/layer_helper.py
+++ b/python/paddle/fluid/layer_helper.py
@@ -17,7 +17,7 @@ from __future__ import print_function
import copy
import six
-from .framework import Parameter, dtype_is_floating, _in_dygraph_mode
+from .framework import Parameter, dtype_is_floating, in_dygraph_mode
from . import unique_name
from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr
diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py
index 869a5f54e9cdf5740c5e216917d92880d7d61e2d..9eed00b16185d00f30dfd75f03e31fb45cf9567c 100644
--- a/python/paddle/fluid/layer_helper_base.py
+++ b/python/paddle/fluid/layer_helper_base.py
@@ -17,7 +17,7 @@ from __future__ import print_function
import copy
import numpy as np
-from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place
+from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place
from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr
from . import core
@@ -54,7 +54,7 @@ class LayerHelperBase(object):
Return Variable construct from value
"""
if isinstance(value, np.ndarray):
- assert _in_dygraph_mode(
+ assert in_dygraph_mode(
), "to_variable could only be called in dygraph mode"
if not block:
@@ -302,7 +302,7 @@ class LayerHelperBase(object):
param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param)
return param
- if _in_dygraph_mode():
+ if in_dygraph_mode():
# In dygraph mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter(
@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use
"""
assert isinstance(var, Variable)
- if _in_dygraph_mode():
+ if in_dygraph_mode():
initializer(var, var.block)
else:
self.startup_program.global_block().create_var(
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 91414fdeb207781afd5e28afa5a3fa6e1018efb1..e69298d52b37111f1478e2dd72d8f1614964b1db 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -23,7 +23,7 @@ import os
import inspect
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer
-from ..framework import Variable, OpProtoHolder, _in_dygraph_mode
+from ..framework import Variable, OpProtoHolder, in_dygraph_mode
from ..dygraph import base
from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
@@ -3288,7 +3288,7 @@ def layer_norm(input,
>>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
"""
- assert _in_dygraph_mode(
+ assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!"
helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype()
@@ -6454,7 +6454,7 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1])
"""
- assert not _in_dygraph_mode(), (
+ assert not in_dygraph_mode(), (
"squeeze layer is not supported in dygraph mode yet.")
helper = LayerHelper("squeeze", **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
@@ -9193,7 +9193,7 @@ def _elementwise_op(helper):
op_type = helper.layer_type
x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None)
- if _in_dygraph_mode():
+ if in_dygraph_mode():
x = base.to_variable(x)
y = base.to_variable(y)
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 02ace724c89e476c915c74bc48d506353e0ae7c6..c3b7aee2b4d2421927adeb9fd44a516a7999cf83 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -55,7 +55,7 @@ class Optimizer(object):
"""
def __init__(self, learning_rate, regularization=None, name=None):
- if framework._in_dygraph_mode():
+ if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay):
raise TypeError(
@@ -205,7 +205,7 @@ class Optimizer(object):
name = self._name + "_" + name
if (name in self._accumulators and
param.name in self._accumulators[name]):
- if framework._in_dygraph_mode():
+ if framework.in_dygraph_mode():
return self._accumulators[name][param.name]
raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name))
@@ -275,7 +275,7 @@ class Optimizer(object):
self._create_global_learning_rate()
optimize_ops = []
- if framework._in_dygraph_mode():
+ if framework.in_dygraph_mode():
for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None:
continue
@@ -374,7 +374,7 @@ class Optimizer(object):
See examples in `apply_gradients`.
"""
self._dtype = loss.dtype
- if framework._in_dygraph_mode():
+ if framework.in_dygraph_mode():
if parameter_list is not None:
parameters = parameter_list
else:
@@ -459,7 +459,7 @@ class Optimizer(object):
Returns:
list: A list of operators appended to the current program.
"""
- if framework._in_dygraph_mode():
+ if framework.in_dygraph_mode():
with program_guard(framework.default_main_program(),
framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads)
diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py
index 9cb88d4a8553f3b750f6cf3b24115b4d188ed1d6..04a36f7cafe7b4445125c4e9bd58f6d30d6c71aa 100644
--- a/python/paddle/fluid/tests/unittests/test_base_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_base_layer.py
@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid
-class L1(fluid.dygraph.Layer):
+class L1(fluid.Layer):
def __init__(self, prefix):
super(L1, self).__init__(prefix)
self._param_attr = fluid.ParamAttr(
@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer):
return self.w1 + self.w2
-class L2(fluid.dygraph.Layer):
+class L2(fluid.Layer):
def __init__(self, prefix):
super(L2, self).__init__(prefix)
self.layer1 = L1(self.full_name())
@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer):
return self.layer1() + self.layer2()
-class L3(fluid.dygraph.Layer):
+class L3(fluid.Layer):
def __init__(self, prefix):
super(L3, self).__init__(prefix)
self.layer1 = L2(self.full_name())
@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase):
ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1")
- self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2])))
+ self.assertTrue(np.allclose(ret.numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self):
with fluid.dygraph.guard():
@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase):
self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1")
self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0")
self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1")
- self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2])))
+ self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2])))
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index 13f2d662178c7e1474ec43fdeadf7046516eb8e5..bc95b90ce4cc1b38b8201ae027b7533680e895ae 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -18,11 +18,11 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
-from paddle.fluid.dygraph.nn import FC
+from paddle.fluid import FC
from test_imperative_base import new_program_scope
-class MyLayer(fluid.dygraph.Layer):
+class MyLayer(fluid.Layer):
def __init__(self, name_scope):
super(MyLayer, self).__init__(name_scope)
@@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer):
return [x]
-class MyPyLayer(fluid.dygraph.PyLayer):
+class MyPyLayer(fluid.PyLayer):
def __init__(self):
super(MyPyLayer, self).__init__()
@@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer):
return np.array(dout) * (1 - np.square(np.array(out)))
-class MLP(fluid.dygraph.Layer):
+class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(),
@@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer):
return x
-class SimpleRNNCell(fluid.dygraph.Layer):
+class SimpleRNNCell(fluid.Layer):
def __init__(self, name_scope, step_input_size, hidden_size, output_size,
param_attr):
super(SimpleRNNCell, self).__init__(name_scope)
@@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
self._dtype = core.VarDesc.VarType.FP32
self.param_attr = param_attr
- def _build_once(self, inputs, pre_hidden):
+ def build_once(self, inputs, pre_hidden):
i2h_param_shape = [self.step_input_size, self.hidden_size]
h2h_param_shape = [self.hidden_size, self.hidden_size]
h2o_param_shape = [self.output_size, self.hidden_size]
@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
return reduce_out, hidden
-class SimpleRNN(fluid.dygraph.Layer):
+class SimpleRNN(fluid.Layer):
def __init__(self, name_scope):
super(SimpleRNN, self).__init__(name_scope)
self.seq_len = 4
@@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase):
inputs.append(fluid.dygraph.base.to_variable(x))
ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret)
- loss._backward()
- self.assertTrue(np.allclose(ret._numpy(), x * 10))
- self.assertTrue(np.allclose(inputs[0]._gradient(), x))
+ loss.backward()
+ self.assertTrue(np.allclose(ret.numpy(), x * 10))
+ self.assertTrue(np.allclose(inputs[0].gradient(), x))
def test_layer(self):
with fluid.dygraph.guard():
cl = core.Layer()
cl.forward([])
- l = fluid.dygraph.Layer("l")
+ l = fluid.Layer("l")
self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self):
with fluid.dygraph.guard():
- class PyLayer1(fluid.dygraph.PyLayer):
+ class PyLayer1(fluid.PyLayer):
def __init__(self):
super(PyLayer1, self).__init__()
@@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase):
my_py_layer = MyPyLayer()
var_inp = fluid.dygraph.base.to_variable(np_inp)
outs = my_py_layer(var_inp)
- dy_out = np.sum(outs[0]._numpy())
- outs[0]._backward()
- dy_grad = var_inp._gradient()
+ dy_out = np.sum(outs[0].numpy())
+ outs[0].backward()
+ dy_grad = var_inp.gradient()
with new_program_scope():
inp = fluid.layers.data(
@@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase):
l = MyLayer("my_layer")
x = l(var_inp)[0]
self.assertIsNotNone(x)
- dy_out = x._numpy()
- x._backward()
- dy_grad = l._x_for_debug._gradient()
+ dy_out = x.numpy()
+ x.backward()
+ dy_grad = l._x_for_debug.gradient()
with new_program_scope():
inp = fluid.layers.data(
@@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp")
out = mlp(var_inp)
- dy_out = out._numpy()
- out._backward()
- dy_grad = mlp._fc1._w._gradient()
+ dy_out = out.numpy()
+ out.backward()
+ dy_grad = mlp._fc1._w.gradient()
with new_program_scope():
inp = fluid.layers.data(
@@ -358,11 +358,11 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn.forward(var_inp)
- dy_out = outs[3]._numpy()
- outs[3]._backward()
- dy_grad_h2o = simple_rnn._cell._h2o_w._gradient()
- dy_grad_h2h = simple_rnn._cell._h2h_w._gradient()
- dy_grad_i2h = simple_rnn._cell._i2h_w._gradient()
+ dy_out = outs[3].numpy()
+ outs[3].backward()
+ dy_grad_h2o = simple_rnn._cell._h2o_w.gradient()
+ dy_grad_h2h = simple_rnn._cell._h2h_w.gradient()
+ dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
with new_program_scope():
inp = fluid.layers.data(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py
index a92b7d62fa598a3ec9b53bade2805cc033f4b9d9..c28058100a43eb4f7da8331d9ac75db9c090bdf9 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py
@@ -18,11 +18,11 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable
-class SimpleImgConvPool(fluid.dygraph.Layer):
+class SimpleImgConvPool(fluid.Layer):
def __init__(self,
name_scope,
num_channels,
@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
return x
-class MNIST(fluid.dygraph.Layer):
+class MNIST(fluid.Layer):
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
@@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase):
img = to_variable(dy_x_data)
label = to_variable(y_data)
- label._stop_gradient = True
+ label.stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
- dy_out = avg_loss._numpy()
+ dy_out = avg_loss.numpy()
- avg_loss._backward()
+ avg_loss.backward()
sgd.minimize(avg_loss)
fluid.dygraph.save_persistables(mnist, "save_dir")
mnist.clear_gradients()
for param in mnist.parameters():
- dy_param_init_value[param.name] = param._numpy()
+ dy_param_init_value[param.name] = param.numpy()
mnist.load_dict(
fluid.dygraph.load_persistables(mnist, "save_dir"))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index ccebd4a54727f383bd4e46ff57bfdc9381577d05..ca2cffa9c75cc851f0911cb0063f4e82bb2a41eb 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5))
NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1))
-class DMF(fluid.dygraph.Layer):
+class DMF(fluid.Layer):
def __init__(self, name_scope):
super(DMF, self).__init__(name_scope)
- self._user_latent = fluid.dygraph.FC(self.full_name(), 256)
- self._item_latent = fluid.dygraph.FC(self.full_name(), 256)
+ self._user_latent = fluid.FC(self.full_name(), 256)
+ self._item_latent = fluid.FC(self.full_name(), 256)
self._user_layers = []
self._item_layers = []
@@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer):
self._user_layers.append(
self.add_sublayer(
'user_layer_%d' % i,
- fluid.dygraph.FC(
- self.full_name(), self._hid_sizes[i], act='relu')))
+ fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self._item_layers.append(
self.add_sublayer(
'item_layer_%d' % i,
- fluid.dygraph.FC(
- self.full_name(), self._hid_sizes[i], act='relu')))
+ fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
def forward(self, users, items):
users = self._user_latent(users)
@@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer):
return fluid.layers.elementwise_mul(users, items)
-class MLP(fluid.dygraph.Layer):
+class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
- self._user_latent = fluid.dygraph.FC(self.full_name(), 256)
- self._item_latent = fluid.dygraph.FC(self.full_name(), 256)
+ self._user_latent = fluid.FC(self.full_name(), 256)
+ self._item_latent = fluid.FC(self.full_name(), 256)
self._match_layers = []
self._hid_sizes = [128, 64]
for i in range(len(self._hid_sizes)):
self._match_layers.append(
self.add_sublayer(
'match_layer_%d' % i,
- fluid.dygraph.FC(
- self.full_name(), self._hid_sizes[i], act='relu')))
+ fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self._mat
def forward(self, users, items):
@@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer):
return match_vec
-class DeepCF(fluid.dygraph.Layer):
+class DeepCF(fluid.Layer):
def __init__(self, name_scope, num_users, num_items, matrix):
super(DeepCF, self).__init__(name_scope)
self._num_users = num_users
@@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer):
matrix.dtype,
is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
- self._rating_matrix._stop_gradient = True
+ self._rating_matrix.stop_gradient = True
self._mlp = MLP(self.full_name())
self._dmf = DMF(self.full_name())
- self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid')
+ self._match_fc = fluid.FC(self.full_name(), 1, act='sigmoid')
def forward(self, users, items):
# users_emb = self._user_emb(users)
@@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase):
fluid.layers.log_loss(prediction,
to_variable(labels_np[
slice:slice + BATCH_SIZE])))
- loss._backward()
+ loss.backward()
adam.minimize(loss)
deepcf.clear_gradients()
- dy_loss = loss._numpy()
+ dy_loss = loss.numpy()
sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss))
self.assertEqual(static_loss, dy_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 58faa1cb85af9cedb70f3a12244cfeb44e0f4f52..5d773ec1c9db160cd63a28c634043037260e0b82 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -22,12 +22,12 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
+from paddle.fluid import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable
-class Discriminator(fluid.dygraph.Layer):
+class Discriminator(fluid.Layer):
def __init__(self, name_scope):
super(Discriminator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=32, act='elu')
@@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer):
return self._fc2(x)
-class Generator(fluid.dygraph.Layer):
+class Generator(fluid.Layer):
def __init__(self, name_scope):
super(Generator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=64, act='elu')
@@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase):
x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))))
d_loss = d_loss_real + d_loss_fake
- d_loss._backward()
+ d_loss.backward()
sgd.minimize(d_loss)
discriminator.clear_gradients()
generator.clear_gradients()
@@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase):
g_loss = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake, label=to_variable(np.ones([2, 1], np.float32))))
- g_loss._backward()
+ g_loss.backward()
sgd.minimize(g_loss)
for p in discriminator.parameters():
- dy_params[p.name] = p._numpy()
+ dy_params[p.name] = p.numpy()
for p in generator.parameters():
- dy_params[p.name] = p._numpy()
+ dy_params[p.name] = p.numpy()
- dy_g_loss = g_loss._numpy()
- dy_d_loss = d_loss._numpy()
+ dy_g_loss = g_loss.numpy()
+ dy_d_loss = d_loss.numpy()
self.assertEqual(dy_g_loss, static_g_loss)
self.assertEqual(dy_d_loss, static_d_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index a8fb9ecfe4be16b73ac2144259f25ed3859ece7e..234fcd60404286977309083257c24d941db77449 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -15,14 +15,12 @@
import contextlib
import unittest
import numpy as np
-import six
import sys
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable
@@ -31,7 +29,7 @@ def gen_data():
pass
-class GraphConv(fluid.dygraph.Layer):
+class GraphConv(fluid.Layer):
def __init__(self, name_scope, in_features, out_features):
super(GraphConv, self).__init__(name_scope)
@@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer):
return fluid.layers.matmul(adj, support) + self.bias
-class GCN(fluid.dygraph.Layer):
+class GCN(fluid.Layer):
def __init__(self, name_scope, num_hidden):
super(GCN, self).__init__(name_scope)
self.gc = GraphConv(self.full_name(), num_hidden, 32)
@@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase):
loss = fluid.layers.reduce_sum(loss)
adam = AdamOptimizer(learning_rate=1e-3)
adam.minimize(loss)
- self.assertEqual(static_loss, loss._numpy())
- self.assertTrue(
- np.allclose(static_weight, model.gc.weight._numpy()))
- sys.stderr.write('%s %s\n' % (static_loss, loss._numpy()))
+ self.assertEqual(static_loss, loss.numpy())
+ self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy()))
+ sys.stderr.write('%s %s\n' % (static_loss, loss.numpy()))
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
index 5ab01839fbc20bbd3c242878c4ea23a00f7b0dca..76b8d3aa3943e44a17ab822618d8d1cb85aaa551 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
@@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase):
img = to_variable(dy_x_data)
label = to_variable(y_data)
- label._stop_gradient = True
+ label.stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
- dy_out = avg_loss._numpy()
+ dy_out = avg_loss.numpy()
if epoch == 0 and batch_id == 0:
for param in mnist.parameters():
- dy_param_init_value[param.name] = param._numpy()
+ dy_param_init_value[param.name] = param.numpy()
- avg_loss._backward()
+ avg_loss.backward()
sgd.minimize(avg_loss)
mnist.clear_gradients()
dy_param_value = {}
for param in mnist.parameters():
- dy_param_value[param.name] = param._numpy()
+ dy_param_value[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 8b659a3e08e381dd6f55b666d9f5f1b172a51930..b9f93119e83159c5bc3052b0292168a9ef641d3e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
-class MLP(fluid.dygraph.Layer):
+class MLP(fluid.Layer):
def __init__(self, name_scope, param_attr=None, bias_attr=None):
super(MLP, self).__init__(name_scope)
@@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost)
- dy_out = avg_loss._numpy()
+ dy_out = avg_loss.numpy()
if batch_id == 0:
for param in mlp.parameters():
- dy_param_init_value[param.name] = param._numpy()
+ dy_param_init_value[param.name] = param.numpy()
- avg_loss._backward()
+ avg_loss.backward()
optimizer.minimize(avg_loss)
mlp.clear_gradients()
dy_param_value = {}
for param in mlp.parameters():
- dy_param_value[param.name] = param._numpy()
+ dy_param_value[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index eb8a82430f062003a66c159c679fe51d7994971a..06ee5f75145778d764b37812fc246367d27870b5 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
import numpy as np
import six
-from paddle.fluid.backward import append_backward
-class SimpleLSTMRNN(fluid.dygraph.Layer):
+class SimpleLSTMRNN(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
@@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
self.cell_array = []
self.hidden_array = []
- def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
+ def build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = []
self.weight_2_arr = []
self.bias_arr = []
@@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
return real_res, last_hidden, last_cell
-class PtbModel(fluid.dygraph.Layer):
+class PtbModel(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
@@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer):
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
- def _build_once(self, input, label, init_hidden, init_cell):
+ def build_once(self, input, label, init_hidden, init_cell):
pass
def forward(self, input, label, init_hidden, init_cell):
@@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_cell)
if i == 0:
for param in ptb_model.parameters():
- dy_param_init[param.name] = param._numpy()
- dy_loss._backward()
+ dy_param_init[param.name] = param.numpy()
+ dy_loss.backward()
sgd.minimize(dy_loss)
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
- dy_param_updated[param.name] = param._numpy()
+ dy_param_updated[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
@@ -333,10 +332,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k in range(3, len(out)):
static_param_updated[static_param_name_list[k -
3]] = out[k]
- self.assertTrue(np.allclose(static_loss_value, dy_loss._numpy()))
- self.assertTrue(np.allclose(static_last_cell_value, last_cell._numpy()))
+ self.assertTrue(np.allclose(static_loss_value, dy_loss.numpy()))
+ self.assertTrue(np.allclose(static_last_cell_value, last_cell.numpy()))
self.assertTrue(
- np.allclose(static_last_hidden_value, last_hidden._numpy()))
+ np.allclose(static_last_hidden_value, last_hidden.numpy()))
for key, value in six.iteritems(static_param_init):
# print("static_init name: {}, value {}".format(key, value))
# print("dy_init name: {}, value {}".format(key, dy_param_init[key]))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index 1d786d584632769e4318bcdeb24ef7ef8ea18597..d9ef08b3c491b24323bb1469165ed5482737013a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
+from paddle.fluid import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
@@ -68,7 +68,7 @@ def optimizer_setting(params):
return optimizer
-class ConvBNLayer(fluid.dygraph.Layer):
+class ConvBNLayer(fluid.Layer):
def __init__(self,
name_scope,
num_channels,
@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
return y
-class BottleneckBlock(fluid.dygraph.Layer):
+class BottleneckBlock(fluid.Layer):
def __init__(self,
name_scope,
num_channels,
@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
return layer_helper.append_activation(y)
-class ResNet(fluid.dygraph.Layer):
+class ResNet(fluid.Layer):
def __init__(self, name_scope, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope)
@@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_init_value = {}
for param in resnet.parameters():
- dy_param_init_value[param.name] = param._numpy()
+ dy_param_init_value[param.name] = param.numpy()
for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num:
@@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase):
img = to_variable(dy_x_data)
label = to_variable(y_data)
- label._stop_gradient = True
+ label.stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
- dy_out = avg_loss._numpy()
+ dy_out = avg_loss.numpy()
if batch_id == 0:
for param in resnet.parameters():
if param.name not in dy_param_init_value:
- dy_param_init_value[param.name] = param._numpy()
+ dy_param_init_value[param.name] = param.numpy()
- avg_loss._backward()
+ avg_loss.backward()
dy_grad_value = {}
for param in resnet.parameters():
@@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_value = {}
for param in resnet.parameters():
- dy_param_value[param.name] = param._numpy()
+ dy_param_value[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py
index 6f87051dc4f1e55ecb6d87a5ca1b636d4d71a46d..90457cc664c0fb1ede99f498ae087caeeebe082b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py
@@ -16,7 +16,8 @@ from __future__ import print_function
import unittest
import paddle.fluid as fluid
-from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard
+from paddle.fluid import Embedding, LayerNorm, FC, Layer
+from paddle.fluid.dygraph import to_variable, guard
from test_imperative_base import new_program_scope
from paddle.fluid import core
import numpy as np
@@ -985,15 +986,15 @@ class TestDygraphTransformer(unittest.TestCase):
if i == 0:
for param in transformer.parameters():
- dy_param_init[param.name] = param._numpy()
+ dy_param_init[param.name] = param.numpy()
- dy_avg_cost._backward()
+ dy_avg_cost.backward()
optimizer.minimize(dy_avg_cost)
transformer.clear_gradients()
if i == batch_num - 1:
for param in transformer.parameters():
- dy_param_updated[param.name] = param._numpy()
+ dy_param_updated[param.name] = param.numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
@@ -1069,13 +1070,13 @@ class TestDygraphTransformer(unittest.TestCase):
4]] = out[k]
self.assertTrue(
- np.array_equal(static_avg_cost_value, dy_avg_cost._numpy()))
+ np.array_equal(static_avg_cost_value, dy_avg_cost.numpy()))
self.assertTrue(
- np.array_equal(static_sum_cost_value, dy_sum_cost._numpy()))
+ np.array_equal(static_sum_cost_value, dy_sum_cost.numpy()))
self.assertTrue(
- np.array_equal(static_predict_value, dy_predict._numpy()))
+ np.array_equal(static_predict_value, dy_predict.numpy()))
self.assertTrue(
- np.array_equal(static_token_num_value, dy_token_num._numpy()))
+ np.array_equal(static_token_num_value, dy_token_num.numpy()))
for key, value in six.iteritems(static_param_init):
self.assertTrue(np.array_equal(value, dy_param_init[key]))
for key, value in six.iteritems(static_param_updated):
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 6cc3c6d90bdae0fe09a368109d7db28f1b69983f..5b56644700e7bc173662a619db5996b2974318cb 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -102,7 +102,7 @@ class TestLayer(LayerTest):
dy_ret = lm(base.to_variable(inp))
self.assertTrue(np.allclose(static_ret, static_ret2))
- self.assertTrue(np.allclose(dy_ret._numpy(), static_ret2))
+ self.assertTrue(np.allclose(dy_ret.numpy(), static_ret2))
def test_relu(self):
with self.static_graph():
@@ -116,7 +116,7 @@ class TestLayer(LayerTest):
t = np.ones([3, 3], dtype='float32')
dy_ret = layers.relu(base.to_variable(t))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_matmul(self):
with self.static_graph():
@@ -137,7 +137,7 @@ class TestLayer(LayerTest):
t2 = np.ones([3, 3], dtype='float32')
dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv2d(self):
with self.static_graph():
@@ -164,7 +164,7 @@ class TestLayer(LayerTest):
'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2])
dy_ret = conv2d(base.to_variable(images))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2))
def test_gru_unit(self):
@@ -206,7 +206,7 @@ class TestLayer(LayerTest):
for i in range(len(static_ret)):
self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
- self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy()))
+ self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy()))
def test_elementwise_math(self):
n = np.ones([3, 3], dtype='float32')
@@ -248,8 +248,8 @@ class TestLayer(LayerTest):
ret = layers.elementwise_sub(ret, n5)
dy_ret = layers.elementwise_mul(ret, n6)
self.assertTrue(
- np.allclose(static_ret, dy_ret._numpy()),
- '%s vs %s' % (static_ret, dy_ret._numpy()))
+ np.allclose(static_ret, dy_ret.numpy()),
+ '%s vs %s' % (static_ret, dy_ret.numpy()))
def test_elementwise_minmax(self):
n = np.ones([3, 3], dtype='float32')
@@ -259,8 +259,8 @@ class TestLayer(LayerTest):
min_ret = layers.elementwise_min(n, n2)
max_ret = layers.elementwise_max(n, n2)
- self.assertTrue(np.allclose(n, min_ret._numpy()))
- self.assertTrue(np.allclose(n2, max_ret._numpy()))
+ self.assertTrue(np.allclose(n, min_ret.numpy()))
+ self.assertTrue(np.allclose(n2, max_ret.numpy()))
def test_sequence_conv(self):
inp_np = np.arange(12).reshape([3, 4]).astype('float32')
@@ -327,7 +327,7 @@ class TestLayer(LayerTest):
'conv2d_transpose', num_filters=10, output_size=28)
dy_rlt = conv2d_transpose(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
+ self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_bilinear_tensor_product(self):
inp_np_x = np.array([[1, 2, 3]]).astype('float32')
@@ -370,7 +370,7 @@ class TestLayer(LayerTest):
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
+ self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_prelu(self):
inp_np = np.ones([5, 200, 100, 100]).astype('float32')
@@ -411,7 +411,7 @@ class TestLayer(LayerTest):
dy_rlt = prelu(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
+ self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_embeding(self):
inp_word = np.array([[[1]]]).astype('int64')
@@ -444,7 +444,7 @@ class TestLayer(LayerTest):
static_rlt3 = emb2(base.to_variable(inp_word))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt))
+ self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt))
def test_nce(self):
window_size = 5
@@ -558,7 +558,7 @@ class TestLayer(LayerTest):
nce_loss3 = nce(embs3, words[label_word])
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt))
+ self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt))
def test_conv3d(self):
with self.static_graph():
@@ -585,7 +585,7 @@ class TestLayer(LayerTest):
conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2)
dy_ret = conv3d(base.to_variable(images))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2))
def test_row_conv(self):
@@ -679,7 +679,7 @@ class TestLayer(LayerTest):
groupNorm = nn.GroupNorm('GroupNorm', groups=2)
dy_ret = groupNorm(base.to_variable(input))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2))
def test_spectral_norm(self):
@@ -729,7 +729,7 @@ class TestLayer(LayerTest):
spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
dy_ret = spectralNorm(base.to_variable(input))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2))
def test_tree_conv(self):
@@ -802,7 +802,7 @@ class TestLayer(LayerTest):
dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
self.assertTrue(np.allclose(static_ret, static_ret2))
- self.assertTrue(np.allclose(static_ret, dy_ret._numpy()))
+ self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv3d_transpose(self):
input_array = np.arange(0, 48).reshape(
@@ -832,7 +832,7 @@ class TestLayer(LayerTest):
use_cudnn=False)
dy_rlt = conv3d_transpose(base.to_variable(input_array))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
- self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
+ self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
class TestBook(unittest.TestCase):