未验证 提交 60d47855 编写于 作者: L lujun 提交者: GitHub

Merge pull request #16779 from junjun315/move-api-to-root

cherry pick move api
...@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d ...@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912')) paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581')) paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
......
...@@ -66,6 +66,8 @@ from . import compiler ...@@ -66,6 +66,8 @@ from . import compiler
from .compiler import * from .compiler import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable from paddle.fluid.layers.math_op_patch import monkey_patch_variable
from . import install_check from . import install_check
from .dygraph.nn import *
from .dygraph.layers import *
Tensor = LoDTensor Tensor = LoDTensor
......
...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable'] ...@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def enabled(): def enabled():
return framework._in_dygraph_mode() return framework.in_dygraph_mode()
@signature_safe_contextmanager @signature_safe_contextmanager
......
...@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None): ...@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
my_layer = layer(fluid.dygraph.Layer) my_layer = layer(fluid.Layer)
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1'] param_1 = param_dict['PtbModel_0.w_1']
or:
my_layer = layer(fluid.dygraph.Layer)
param_path = "./my_paddle_model"
filename = "model.file"
param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path,
filename=filename)
param_1 = param_dict['PtbModel_0.w_1']
""" """
if isinstance(vardict, collections.OrderedDict): if isinstance(vardict, collections.OrderedDict):
return _load_var_from_file(vardict, dirname, filename) return _load_var_from_file(vardict, dirname, filename)
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import copy import copy
import six import six
from ..framework import Parameter, _in_dygraph_mode from ..framework import Parameter, in_dygraph_mode
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .. import core from .. import core
from six.moves import zip from six.moves import zip
......
...@@ -139,14 +139,14 @@ class Layer(core.Layer): ...@@ -139,14 +139,14 @@ class Layer(core.Layer):
def clear_gradients(self): def clear_gradients(self):
for p in self.parameters(): for p in self.parameters():
p._clear_gradient() p.clear_gradient()
def _build_once(self, *args): def build_once(self, *args):
pass pass
def __call__(self, *inputs): def __call__(self, *inputs):
if not self._built: if not self._built:
self._build_once(*inputs) self.build_once(*inputs)
outputs = self.forward(*inputs) outputs = self.forward(*inputs)
self._built = True self._built = True
......
...@@ -19,7 +19,7 @@ from six.moves import reduce ...@@ -19,7 +19,7 @@ from six.moves import reduce
from .. import core from .. import core
from ..layers import utils from ..layers import utils
from . import layers from . import layers
from ..framework import Variable, _in_dygraph_mode, OpProtoHolder, Parameter from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np import numpy as np
...@@ -33,6 +33,109 @@ __all__ = [ ...@@ -33,6 +33,109 @@ __all__ = [
class Conv2D(layers.Layer): class Conv2D(layers.Layer):
"""
The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input and
Output are in NCHW format, where N is batch size, C is the number of
channels, H is the height of the feature, and W is the width of the feature.
Filter is in MCHW format, where M is the number of output image channels,
C is the number of input image channels, H is the height of the filter,
and W is the width of the filter. If the groups is greater than 1,
C will equal the number of input image channels divided by the groups.
Please refer to UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
for more detials.
If bias attribution and activation type are provided, bias is added to the
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args:
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended.
Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -265,7 +368,7 @@ class Conv3D(layers.Layer): ...@@ -265,7 +368,7 @@ class Conv3D(layers.Layer):
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
def _build_once(self, input): def build_once(self, input):
num_channels = input.shape[1] num_channels = input.shape[1]
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
...@@ -332,6 +435,116 @@ class Conv3D(layers.Layer): ...@@ -332,6 +435,116 @@ class Conv3D(layers.Layer):
class Conv3DTranspose(layers.Layer): class Conv3DTranspose(layers.Layer):
"""
**Convlution3D transpose layer**
The convolution3D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCDHW format. Where N is batch size, C is the number of channels,
D is the depth of the feature, H is the height of the feature, and W
is the width of the feature. Parameters(dilations, strides, paddings) are
two elements. These two elements represent height and width, respectively.
The details of convolution transpose layer, please refer to the following
explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
If bias attribution and activation type are provided, bias is added to
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
Args:
input(Variable): The input image with [N, C, D, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain three integers, (image_D, image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended.
Default: None.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution transpose result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
conv3d_transpose = nn.Conv3DTranspose(
'Conv3DTranspose',
num_filters=12,
filter_size=12,
use_cudnn=False)
transpose_res = conv3d_transpose(base.to_variable(input_array))
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_filters, num_filters,
...@@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer): ...@@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
self._input_channel = input.shape[1] self._input_channel = input.shape[1]
...@@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer): ...@@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer):
class Pool2D(layers.Layer): class Pool2D(layers.Layer):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
pool_type: ${pooling_type_comment}
pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int.
pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple,
it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width).
Otherwise, the pool padding size will be a square of an int.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
exclusive (bool): Whether to exclude padding points in average pooling
mode, default is true
Returns:
Variable: The pooling result.
Raises:
ValueError: If 'pool_type' is not "max" nor "avg"
ValueError: If 'global_pooling' is False and 'pool_size' is -1
ValueError: If 'use_cudnn' is not a bool value.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
pool2d = fluid.Pool2D("pool2d",pool_size=2,
pool_type='max',
pool_stride=1,
global_pooling=False)
pool2d_res = pool2d(data)
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
pool_size=-1, pool_size=-1,
...@@ -495,6 +756,102 @@ class Pool2D(layers.Layer): ...@@ -495,6 +756,102 @@ class Pool2D(layers.Layer):
class FC(layers.Layer): class FC(layers.Layer):
"""
**Fully Connected Layer**
This function creates a fully connected layer in the network. It can take
one or multiple tensors as its inputs(input can be a list of Variable, see
Args in detail). It creates a variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [M, `size`],
where M is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [M, `size`] will be summed up. If bias_attr
is not None, a bias variable will be created and added to the output.
Finally, if activation is not None, it will be applied to the output as well.
When the input is single tensor:
.. math::
Out = Act({XW + b})
When the input are multiple tensors:
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of Variable.
* :math:`X_i`: The i-th input tensor.
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output tensor.
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2],
[0.3, 0.4]]]
data_1.shape = (1, 2, 2) # 1 is batch_size
data_2 = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3)
out = fluid.layers.fc(input=[data_1, data_2], size=2)
Then:
out.data = [[0.18669507, 0.1893476]]
out.shape = (1, 2)
Args:
input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
the input tensor(s) is at least 2.
size(int): The number of output units in this layer.
num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multidimensional tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
parameters/weights of this layer.
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, default None): Activation to be applied to the output of this layer.
is_test(bool): A flag indicating whether execution is in test phase.
name (str, default None): The name of this layer.
Returns:
Variable: The transformation result.
Raises:
ValueError: If rank of the input tensor is less than 2.
Examples:
.. code-block:: python
# when input is single tensor
data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
fc = fluid.FC("fc", size=1000, act="tanh")
fc_res = fc(data)
# when input are multiple tensors
data_1 = fluid.layers.data(name="data_1", shape=[32, 32], dtype="float32")
data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32")
fc = fluid.FC("fc", size=1000, act="tanh")
fc_res = fc([data_1, data_2])
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
size, size,
...@@ -522,7 +879,7 @@ class FC(layers.Layer): ...@@ -522,7 +879,7 @@ class FC(layers.Layer):
assert isinstance(value, Parameter) assert isinstance(value, Parameter)
self.__w[i] = value self.__w[i] = value
def _build_once(self, input): def build_once(self, input):
i = 0 i = 0
for inp, param in self._helper.iter_inputs_and_params(input, for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr): self._param_attr):
...@@ -591,6 +948,91 @@ class FC(layers.Layer): ...@@ -591,6 +948,91 @@ class FC(layers.Layer):
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
"""
**Batch Normalization Layer**
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
for more details.
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
\ mini-batch\ mean \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
When use_global_stats = True, the :math:`\\mu_{\\beta}`
and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
They are global (or running) statistics. (It usually got from the
pre-trained model.)
The training and testing (or inference) have the same behavior:
.. math::
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta
Args:
input(variable): The rank of input variable can be 2, 3, 4, 5.
act(string, Default None): Activation type, linear|relu|prelu|...
is_test (bool, Default False): A flag indicating whether it is in
test phrase or not.
momentum(float, Default 0.9): The value used for the moving_mean and
moving_var computation. The updated formula is:
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not.
fuse_with_relu (bool): if True, this OP performs relu after batch norm.
use_global_stats(bool, Default False): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period.
Returns:
Variable: A tensor variable which is the result after applying batch normalization on the input.
Examples:
.. code-block:: python
fc = fluid.FC('fc', size=200, param_attr='fc1.w')
hidden1 = fc(x)
batch_norm = fluid.BatchNorm("batch_norm", 10)
hidden2 = batch_norm(hidden1)
"""
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer): ...@@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype, dtype=self._dtype,
default_initializer=Constant(1.0)) default_initializer=Constant(1.0))
if use_global_stats and self._param_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._scale._stop_gradient = True self._scale.stop_gradient = True
self._bias = self.create_parameter( self._bias = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
...@@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer): ...@@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
if use_global_stats and self._param_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._bias._stop_gradient = True self._bias.stop_gradient = True
self._mean = self.create_parameter( self._mean = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
...@@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer): ...@@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=self._dtype) dtype=self._dtype)
self._mean._stop_gradient = True self._mean.stop_gradient = True
self._variance = self.create_parameter( self._variance = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
...@@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer): ...@@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer):
do_model_average=do_model_average_for_mean_and_var), do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=self._dtype) dtype=self._dtype)
self._variance._stop_gradient = True self._variance.stop_gradient = True
self._in_place = in_place self._in_place = in_place
self._momentum = momentum self._momentum = momentum
...@@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer): ...@@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer):
self._fuse_with_relu = fuse_with_relu self._fuse_with_relu = fuse_with_relu
self._use_global_stats = use_global_stats self._use_global_stats = use_global_stats
def _build_once(self, input): def build_once(self, input):
pass pass
def forward(self, input): def forward(self, input):
...@@ -747,7 +1189,7 @@ class Embedding(layers.Layer): ...@@ -747,7 +1189,7 @@ class Embedding(layers.Layer):
dict_size = len(dataset.ids) dict_size = len(dataset.ids)
input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
embedding = fluid.dygraph.Embedding(size=[dict_size, 16]) embedding = fluid.Embedding(size=[dict_size, 16])
fc = embedding(input) fc = embedding(input)
""" """
...@@ -797,70 +1239,70 @@ class Embedding(layers.Layer): ...@@ -797,70 +1239,70 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer): class LayerNorm(layers.Layer):
def __init__(self, """
name_scope, ${comment}
scale=True,
shift=True,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None):
"""
${comment}
The formula is as follows: The formula is as follows:
.. math:: .. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2} \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2}
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
* :math:`a`: the vector representation of the summed inputs to the neurons * :math:`a`: the vector representation of the summed inputs to the neurons
in that layer. in that layer.
* :math:`H`: the number of hidden units in a layers * :math:`H`: the number of hidden units in a layers
* :math:`g`: the trainable scale parameter. * :math:`g`: the trainable scale parameter.
* :math:`b`: the trainable bias parameter. * :math:`b`: the trainable bias parameter.
Args: Args:
input(Variable): The input tensor variable. input(Variable): The input tensor variable.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool): Whether to learn the adaptive gain :math:`g` after
normalization. Default True. normalization. Default True.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool): Whether to learn the adaptive bias :math:`b` after
normalization. Default True. normalization. Default True.
begin_norm_axis(int): The normalization will be performed along begin_norm_axis(int): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default 1. Default 1.
epsilon(float): The small value added to the variance to prevent epsilon(float): The small value added to the variance to prevent
division by zero. Default 1e-05. division by zero. Default 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None, omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default None. :attr:`param_attr` is initialized as 1 if it is added. Default None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None, omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default None. :attr:`bias_attr` is initialized as 0 if it is added. Default None.
act(str): Activation to be applied to the output of layer normalizaiton. act(str): Activation to be applied to the output of layer normalizaiton.
Default None. Default None.
Returns: Returns:
${y_comment} ${y_comment}
Examples: Examples:
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32], >>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
>>> dtype='float32') >>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
""" """
def __init__(self,
name_scope,
scale=True,
shift=True,
begin_norm_axis=1,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None):
super(LayerNorm, self).__init__(name_scope) super(LayerNorm, self).__init__(name_scope)
self._scale = scale self._scale = scale
self._shift = shift self._shift = shift
...@@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer): ...@@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
input_shape = input.shape input_shape = input.shape
param_shape = [ param_shape = [
...@@ -1232,7 +1674,7 @@ class NCE(layers.Layer): ...@@ -1232,7 +1674,7 @@ class NCE(layers.Layer):
'remote_prefetch': remote_prefetch 'remote_prefetch': remote_prefetch
} }
def _build_once(self, input, label, sample_weight=None): def build_once(self, input, label, sample_weight=None):
assert isinstance(input, Variable) assert isinstance(input, Variable)
assert isinstance(label, Variable) assert isinstance(label, Variable)
...@@ -1318,7 +1760,7 @@ class PRelu(layers.Layer): ...@@ -1318,7 +1760,7 @@ class PRelu(layers.Layer):
raise ValueError('mode should be one of all, channel, element.') raise ValueError('mode should be one of all, channel, element.')
self._alpha_shape = [1] self._alpha_shape = [1]
def _build_once(self, input): def build_once(self, input):
if self._mode == 'channel': if self._mode == 'channel':
self._alpha_shape = [1, input.shape[1], 1, 1] self._alpha_shape = [1, input.shape[1], 1, 1]
elif self._mode == 'element': elif self._mode == 'element':
...@@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer): ...@@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer):
self._name = name self._name = name
self._inputs = dict() self._inputs = dict()
def _build_once(self, x, y): def build_once(self, x, y):
self._dtype = self._helper.input_dtype(x) self._dtype = self._helper.input_dtype(x)
param_shape = [self._size, x.shape[1], y.shape[1]] param_shape = [self._size, x.shape[1], y.shape[1]]
...@@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer): ...@@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer):
self._output_size = output_size self._output_size = output_size
self._op_type = 'conv2d_transpose' self._op_type = 'conv2d_transpose'
def _build_once(self, input): def build_once(self, input):
input_channel = input.shape[1] input_channel = input.shape[1]
if (input_channel == self._groups and if (input_channel == self._groups and
self._num_filters == input_channel and not self._use_cudnn): self._num_filters == input_channel and not self._use_cudnn):
...@@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer): ...@@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer):
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
act=None): act=None):
assert not _in_dygraph_mode( assert not in_dygraph_mode(
), "SequenceConv is not supported by dynamic graph mode yet!" ), "SequenceConv is not supported by dynamic graph mode yet!"
super(SequenceConv, self).__init__(name_scope) super(SequenceConv, self).__init__(name_scope)
self._num_filters = num_filters self._num_filters = num_filters
...@@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer): ...@@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._param_attr = param_attr self._param_attr = param_attr
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
filter_shape = [self._filter_size * input.shape[1], self._num_filters] filter_shape = [self._filter_size * input.shape[1], self._num_filters]
self._filter_param = self.create_parameter( self._filter_param = self.create_parameter(
...@@ -1726,14 +2168,14 @@ class RowConv(layers.Layer): ...@@ -1726,14 +2168,14 @@ class RowConv(layers.Layer):
future_context_size, future_context_size,
param_attr=None, param_attr=None,
act=None): act=None):
assert not _in_dygraph_mode( assert not in_dygraph_mode(
), "RowConv is not supported by dynamic graph mode yet!" ), "RowConv is not supported by dynamic graph mode yet!"
super(RowConv, self).__init__(name_scope) super(RowConv, self).__init__(name_scope)
self._act = act self._act = act
self._param_attr = param_attr self._param_attr = param_attr
self._future_context_size = future_context_size self._future_context_size = future_context_size
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
filter_shape = [self._future_context_size + 1, input.shape[1]] filter_shape = [self._future_context_size + 1, input.shape[1]]
self._filter_param = self.create_parameter( self._filter_param = self.create_parameter(
...@@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer): ...@@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer):
if data_layout != 'NCHW': if data_layout != 'NCHW':
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def _build_once(self, input): def build_once(self, input):
self._dtype = self._helper.input_dtype(input) self._dtype = self._helper.input_dtype(input)
param_shape = [input.shape[1]] param_shape = [input.shape[1]]
if self._bias_attr: if self._bias_attr:
...@@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer): ...@@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer):
self._eps = eps self._eps = eps
self._dim = dim self._dim = dim
def _build_once(self, weight): def build_once(self, weight):
self._dtype = self._helper.input_dtype(weight) self._dtype = self._helper.input_dtype(weight)
input_shape = weight.shape input_shape = weight.shape
h = input_shape[self._dim] h = input_shape[self._dim]
...@@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer): ...@@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._param_attr = param_attr self._param_attr = param_attr
def _build_once(self, nodes_vector, edge_set): def build_once(self, nodes_vector, edge_set):
assert isinstance(nodes_vector, Variable) assert isinstance(nodes_vector, Variable)
assert isinstance(edge_set, Variable) assert isinstance(edge_set, Variable)
self._dtype = self._helper.input_dtype(nodes_vector) self._dtype = self._helper.input_dtype(nodes_vector)
......
...@@ -67,6 +67,7 @@ __all__ = [ ...@@ -67,6 +67,7 @@ __all__ = [
'cuda_places', 'cuda_places',
'cpu_places', 'cpu_places',
'cuda_pinned_places', 'cuda_pinned_places',
'in_dygraph_mode',
] ]
EMPTY_VAR_NAME = core.kEmptyVarName() EMPTY_VAR_NAME = core.kEmptyVarName()
...@@ -79,7 +80,10 @@ _dygraph_tracer_ = None ...@@ -79,7 +80,10 @@ _dygraph_tracer_ = None
_dygraph_current_expected_place_ = None _dygraph_current_expected_place_ = None
def _in_dygraph_mode(): def in_dygraph_mode():
'''
Returns(bool): True if the program is running in dynamic graph mode
'''
return _dygraph_tracer_ is not None return _dygraph_tracer_ is not None
...@@ -396,7 +400,7 @@ class Variable(object): ...@@ -396,7 +400,7 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
if _in_dygraph_mode(): if in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
if not self._ivar: if not self._ivar:
...@@ -482,21 +486,21 @@ class Variable(object): ...@@ -482,21 +486,21 @@ class Variable(object):
self.block.vars[name] = self self.block.vars[name] = self
self.op = None self.op = None
self.stop_gradient = stop_gradient self._stop_gradient = stop_gradient
self.is_data = is_data self.is_data = is_data
def _numpy(self): def numpy(self):
new_ivar = self._ivar._copy_to(core.CPUPlace(), True) new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor()) return np.array(new_ivar.value().get_tensor())
def _backward(self): def backward(self):
self._ivar._run_backward() self._ivar._run_backward()
def _gradient(self): def gradient(self):
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True) new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor()) return np.array(new_ivar.value().get_tensor())
def _clear_gradient(self): def clear_gradient(self):
self._ivar._clear_gradient() self._ivar._clear_gradient()
def __str__(self): def __str__(self):
...@@ -516,7 +520,7 @@ class Variable(object): ...@@ -516,7 +520,7 @@ class Variable(object):
Returns: Returns:
str: The debug string. str: The debug string.
""" """
if _in_dygraph_mode(): if in_dygraph_mode():
# TODO(panyx0718): add more dygraph debug info. # TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype, return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype,
self.shape) self.shape)
...@@ -535,7 +539,7 @@ class Variable(object): ...@@ -535,7 +539,7 @@ class Variable(object):
__repr__ = __str__ __repr__ = __str__
def _set_desc(self, input): def set_desc(self, input):
""" """
Set the variable description. Set the variable description.
...@@ -548,43 +552,43 @@ class Variable(object): ...@@ -548,43 +552,43 @@ class Variable(object):
self.desc = input self.desc = input
@property @property
def _stop_gradient(self): def stop_gradient(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.stop_gradient return self._ivar.stop_gradient
else: else:
return self.stop_gradient return self._stop_gradient
@_stop_gradient.setter @stop_gradient.setter
def _stop_gradient(self, s): def stop_gradient(self, s):
if _in_dygraph_mode(): if in_dygraph_mode():
self._ivar.stop_gradient = s self._ivar.stop_gradient = s
else: else:
self.stop_gradient = s self._stop_gradient = s
@property @property
def persistable(self): def persistable(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
return self.desc.persistable() return self.desc.persistable()
@persistable.setter @persistable.setter
def persistable(self, p): def persistable(self, p):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.persistable return self._ivar.persistable
else: else:
self.desc.set_persistable(p) self.desc.set_persistable(p)
@property @property
def name(self): def name(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.name return self._ivar.name
else: else:
return cpt.to_text(self.desc.name()) return cpt.to_text(self.desc.name())
@name.setter @name.setter
def name(self, new_name): def name(self, new_name):
if _in_dygraph_mode(): if in_dygraph_mode():
self._ivar.name = new_name self._ivar.name = new_name
else: else:
self.desc.set_name(new_name) self.desc.set_name(new_name)
...@@ -592,14 +596,14 @@ class Variable(object): ...@@ -592,14 +596,14 @@ class Variable(object):
@property @property
def shape(self): def shape(self):
# convert to tuple, make it as same as numpy API. # convert to tuple, make it as same as numpy API.
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.shape return self._ivar.shape
else: else:
return tuple(self.desc.shape()) return tuple(self.desc.shape())
@property @property
def dtype(self): def dtype(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.dtype() return self.desc.dtype()
...@@ -611,7 +615,7 @@ class Variable(object): ...@@ -611,7 +615,7 @@ class Variable(object):
@property @property
def type(self): def type(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self._ivar.dtype return self._ivar.dtype
else: else:
return self.desc.type() return self.desc.type()
...@@ -721,7 +725,7 @@ class Variable(object): ...@@ -721,7 +725,7 @@ class Variable(object):
name=unique_name.generate(".".join(self.name)), name=unique_name.generate(".".join(self.name)),
dtype=self.dtype, dtype=self.dtype,
persistable=self.persistable, persistable=self.persistable,
stop_gradient=self._stop_gradient, ) stop_gradient=self.stop_gradient, )
else: else:
return self return self
...@@ -930,7 +934,7 @@ class Operator(object): ...@@ -930,7 +934,7 @@ class Operator(object):
inputs=None, inputs=None,
outputs=None, outputs=None,
attrs=None): attrs=None):
if _in_dygraph_mode(): if in_dygraph_mode():
if type is None: if type is None:
raise ValueError( raise ValueError(
"`type` to initialized an Operator can not be None.") "`type` to initialized an Operator can not be None.")
...@@ -1049,7 +1053,7 @@ class Operator(object): ...@@ -1049,7 +1053,7 @@ class Operator(object):
for arg in out_args: for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name)) out_arg_names.append(cpt.to_text(arg.name))
# TODO(minqiyang): could we remove variable's op in static mode? # TODO(minqiyang): could we remove variable's op in static mode?
if not _in_dygraph_mode(): if not in_dygraph_mode():
arg.op = self arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
...@@ -1095,7 +1099,7 @@ class Operator(object): ...@@ -1095,7 +1099,7 @@ class Operator(object):
@property @property
def type(self): def type(self):
if _in_dygraph_mode(): if in_dygraph_mode():
return self.iop.type return self.iop.type
else: else:
return self.desc.type() return self.desc.type()
...@@ -1638,7 +1642,7 @@ class Block(object): ...@@ -1638,7 +1642,7 @@ class Block(object):
Returns: Returns:
Operator: the append Operator. Operator: the append Operator.
""" """
if _in_dygraph_mode(): if in_dygraph_mode():
op = Operator( op = Operator(
block=self, block=self,
desc=None, desc=None,
...@@ -1710,7 +1714,7 @@ class Block(object): ...@@ -1710,7 +1714,7 @@ class Block(object):
return self.ops[start:end] return self.ops[start:end]
def _prepend_op(self, *args, **kwargs): def _prepend_op(self, *args, **kwargs):
if _in_dygraph_mode(): if in_dygraph_mode():
op = Operator( op = Operator(
self, self,
None, None,
......
...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer): ...@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu() 'force_cpu': self._force_cpu or force_init_on_cpu()
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer): ...@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer): ...@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): ...@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer): ...@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer): ...@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer): ...@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape), 'shape': list(shape),
value_name: values value_name: values
}) })
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer): ...@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values value_name: values
}, },
stop_gradient=True) stop_gradient=True)
if not framework._in_dygraph_mode(): if not framework.in_dygraph_mode():
var.op = op var.op = op
return op return op
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import six import six
from .framework import Parameter, dtype_is_floating, _in_dygraph_mode from .framework import Parameter, dtype_is_floating, in_dygraph_mode
from . import unique_name from . import unique_name
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr from .param_attr import ParamAttr
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import copy import copy
import numpy as np import numpy as np
from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place
from . import unique_name from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr, WeightNormParamAttr
from . import core from . import core
...@@ -54,7 +54,7 @@ class LayerHelperBase(object): ...@@ -54,7 +54,7 @@ class LayerHelperBase(object):
Return Variable construct from value Return Variable construct from value
""" """
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
assert _in_dygraph_mode( assert in_dygraph_mode(
), "to_variable could only be called in dygraph mode" ), "to_variable could only be called in dygraph mode"
if not block: if not block:
...@@ -302,7 +302,7 @@ class LayerHelperBase(object): ...@@ -302,7 +302,7 @@ class LayerHelperBase(object):
param = self._create_weight_normalize(attr, shape, dtype) param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param) WeightNormParamAttr.params_with_weight_norm.append(param)
return param return param
if _in_dygraph_mode(): if in_dygraph_mode():
# In dygraph mode, we want the returned parameter to be # In dygraph mode, we want the returned parameter to be
# initialized so that it can be used imperatively. # initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
...@@ -370,7 +370,7 @@ class LayerHelperBase(object): ...@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use initializer: initializer to use
""" """
assert isinstance(var, Variable) assert isinstance(var, Variable)
if _in_dygraph_mode(): if in_dygraph_mode():
initializer(var, var.block) initializer(var, var.block)
else: else:
self.startup_program.global_block().create_var( self.startup_program.global_block().create_var(
......
...@@ -23,7 +23,7 @@ import os ...@@ -23,7 +23,7 @@ import os
import inspect import inspect
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, _in_dygraph_mode from ..framework import Variable, OpProtoHolder, in_dygraph_mode
from ..dygraph import base from ..dygraph import base
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
...@@ -3288,7 +3288,7 @@ def layer_norm(input, ...@@ -3288,7 +3288,7 @@ def layer_norm(input,
>>> dtype='float32') >>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
""" """
assert _in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!" ) is not True, "please use FC instead of fc in dygraph mode!"
helper = LayerHelper('layer_norm', **locals()) helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -6454,7 +6454,7 @@ def squeeze(input, axes, name=None): ...@@ -6454,7 +6454,7 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10]) x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1]) y = layers.sequeeze(input=x, axes=[1])
""" """
assert not _in_dygraph_mode(), ( assert not in_dygraph_mode(), (
"squeeze layer is not supported in dygraph mode yet.") "squeeze layer is not supported in dygraph mode yet.")
helper = LayerHelper("squeeze", **locals()) helper = LayerHelper("squeeze", **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype) out = helper.create_variable_for_type_inference(dtype=input.dtype)
...@@ -9193,7 +9193,7 @@ def _elementwise_op(helper): ...@@ -9193,7 +9193,7 @@ def _elementwise_op(helper):
op_type = helper.layer_type op_type = helper.layer_type
x = helper.kwargs.get('x', None) x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None) y = helper.kwargs.get('y', None)
if _in_dygraph_mode(): if in_dygraph_mode():
x = base.to_variable(x) x = base.to_variable(x)
y = base.to_variable(y) y = base.to_variable(y)
......
...@@ -55,7 +55,7 @@ class Optimizer(object): ...@@ -55,7 +55,7 @@ class Optimizer(object):
""" """
def __init__(self, learning_rate, regularization=None, name=None): def __init__(self, learning_rate, regularization=None, name=None):
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \ if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay): not isinstance(learning_rate, LearningRateDecay):
raise TypeError( raise TypeError(
...@@ -205,7 +205,7 @@ class Optimizer(object): ...@@ -205,7 +205,7 @@ class Optimizer(object):
name = self._name + "_" + name name = self._name + "_" + name
if (name in self._accumulators and if (name in self._accumulators and
param.name in self._accumulators[name]): param.name in self._accumulators[name]):
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
raise Exception("Accumulator {} already exists for parameter {}". raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name)) format(name, param.name))
...@@ -275,7 +275,7 @@ class Optimizer(object): ...@@ -275,7 +275,7 @@ class Optimizer(object):
self._create_global_learning_rate() self._create_global_learning_rate()
optimize_ops = [] optimize_ops = []
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
for param_and_grad in parameters_and_grads: for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None: if param_and_grad[1] is None:
continue continue
...@@ -374,7 +374,7 @@ class Optimizer(object): ...@@ -374,7 +374,7 @@ class Optimizer(object):
See examples in `apply_gradients`. See examples in `apply_gradients`.
""" """
self._dtype = loss.dtype self._dtype = loss.dtype
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
else: else:
...@@ -459,7 +459,7 @@ class Optimizer(object): ...@@ -459,7 +459,7 @@ class Optimizer(object):
Returns: Returns:
list: A list of operators appended to the current program. list: A list of operators appended to the current program.
""" """
if framework._in_dygraph_mode(): if framework.in_dygraph_mode():
with program_guard(framework.default_main_program(), with program_guard(framework.default_main_program(),
framework.default_startup_program()): framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads) optimize_ops = self._create_optimization_pass(params_grads)
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
class L1(fluid.dygraph.Layer): class L1(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L1, self).__init__(prefix) super(L1, self).__init__(prefix)
self._param_attr = fluid.ParamAttr( self._param_attr = fluid.ParamAttr(
...@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer): ...@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer):
return self.w1 + self.w2 return self.w1 + self.w2
class L2(fluid.dygraph.Layer): class L2(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L2, self).__init__(prefix) super(L2, self).__init__(prefix)
self.layer1 = L1(self.full_name()) self.layer1 = L1(self.full_name())
...@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer): ...@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer):
return self.layer1() + self.layer2() return self.layer1() + self.layer2()
class L3(fluid.dygraph.Layer): class L3(fluid.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L3, self).__init__(prefix) super(L3, self).__init__(prefix)
self.layer1 = L2(self.full_name()) self.layer1 = L2(self.full_name())
...@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase): ...@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase):
ret = l() ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1") self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret.numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self): def test_three_level(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
...@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase): ...@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase):
self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1") self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1")
self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0") self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0")
self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1") self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2])))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.nn import FC from paddle.fluid import FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MyLayer(fluid.dygraph.Layer): class MyLayer(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MyLayer, self).__init__(name_scope) super(MyLayer, self).__init__(name_scope)
...@@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer): ...@@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer):
return [x] return [x]
class MyPyLayer(fluid.dygraph.PyLayer): class MyPyLayer(fluid.PyLayer):
def __init__(self): def __init__(self):
super(MyPyLayer, self).__init__() super(MyPyLayer, self).__init__()
...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer): ...@@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer):
return np.array(dout) * (1 - np.square(np.array(out))) return np.array(dout) * (1 - np.square(np.array(out)))
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), self._fc1 = FC(self.full_name(),
...@@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer): ...@@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer):
return x return x
class SimpleRNNCell(fluid.dygraph.Layer): class SimpleRNNCell(fluid.Layer):
def __init__(self, name_scope, step_input_size, hidden_size, output_size, def __init__(self, name_scope, step_input_size, hidden_size, output_size,
param_attr): param_attr):
super(SimpleRNNCell, self).__init__(name_scope) super(SimpleRNNCell, self).__init__(name_scope)
...@@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): ...@@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
self._dtype = core.VarDesc.VarType.FP32 self._dtype = core.VarDesc.VarType.FP32
self.param_attr = param_attr self.param_attr = param_attr
def _build_once(self, inputs, pre_hidden): def build_once(self, inputs, pre_hidden):
i2h_param_shape = [self.step_input_size, self.hidden_size] i2h_param_shape = [self.step_input_size, self.hidden_size]
h2h_param_shape = [self.hidden_size, self.hidden_size] h2h_param_shape = [self.hidden_size, self.hidden_size]
h2o_param_shape = [self.output_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size]
...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): ...@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer):
return reduce_out, hidden return reduce_out, hidden
class SimpleRNN(fluid.dygraph.Layer): class SimpleRNN(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(SimpleRNN, self).__init__(name_scope) super(SimpleRNN, self).__init__(name_scope)
self.seq_len = 4 self.seq_len = 4
...@@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase): ...@@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase):
inputs.append(fluid.dygraph.base.to_variable(x)) inputs.append(fluid.dygraph.base.to_variable(x))
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss._backward() loss.backward()
self.assertTrue(np.allclose(ret._numpy(), x * 10)) self.assertTrue(np.allclose(ret.numpy(), x * 10))
self.assertTrue(np.allclose(inputs[0]._gradient(), x)) self.assertTrue(np.allclose(inputs[0].gradient(), x))
def test_layer(self): def test_layer(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
cl = core.Layer() cl = core.Layer()
cl.forward([]) cl.forward([])
l = fluid.dygraph.Layer("l") l = fluid.Layer("l")
self.assertRaises(NotImplementedError, l.forward, []) self.assertRaises(NotImplementedError, l.forward, [])
def test_pylayer_func_id(self): def test_pylayer_func_id(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
class PyLayer1(fluid.dygraph.PyLayer): class PyLayer1(fluid.PyLayer):
def __init__(self): def __init__(self):
super(PyLayer1, self).__init__() super(PyLayer1, self).__init__()
...@@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase): ...@@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase):
my_py_layer = MyPyLayer() my_py_layer = MyPyLayer()
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
outs = my_py_layer(var_inp) outs = my_py_layer(var_inp)
dy_out = np.sum(outs[0]._numpy()) dy_out = np.sum(outs[0].numpy())
outs[0]._backward() outs[0].backward()
dy_grad = var_inp._gradient() dy_grad = var_inp.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase): ...@@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase):
l = MyLayer("my_layer") l = MyLayer("my_layer")
x = l(var_inp)[0] x = l(var_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
dy_out = x._numpy() dy_out = x.numpy()
x._backward() x.backward()
dy_grad = l._x_for_debug._gradient() dy_grad = l._x_for_debug.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase): ...@@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp") mlp = MLP("mlp")
out = mlp(var_inp) out = mlp(var_inp)
dy_out = out._numpy() dy_out = out.numpy()
out._backward() out.backward()
dy_grad = mlp._fc1._w._gradient() dy_grad = mlp._fc1._w.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
...@@ -358,11 +358,11 @@ class TestImperative(unittest.TestCase): ...@@ -358,11 +358,11 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
simple_rnn = SimpleRNN("simple_rnn") simple_rnn = SimpleRNN("simple_rnn")
outs, pre_hiddens = simple_rnn.forward(var_inp) outs, pre_hiddens = simple_rnn.forward(var_inp)
dy_out = outs[3]._numpy() dy_out = outs[3].numpy()
outs[3]._backward() outs[3].backward()
dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() dy_grad_h2o = simple_rnn._cell._h2o_w.gradient()
dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() dy_grad_h2h = simple_rnn._cell._h2h_w.gradient()
dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
return x return x
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
...@@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase): ...@@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
avg_loss._backward() avg_loss.backward()
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
fluid.dygraph.save_persistables(mnist, "save_dir") fluid.dygraph.save_persistables(mnist, "save_dir")
mnist.clear_gradients() mnist.clear_gradients()
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
mnist.load_dict( mnist.load_dict(
fluid.dygraph.load_persistables(mnist, "save_dir")) fluid.dygraph.load_persistables(mnist, "save_dir"))
......
...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5)) ...@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5))
NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1))
class DMF(fluid.dygraph.Layer): class DMF(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(DMF, self).__init__(name_scope) super(DMF, self).__init__(name_scope)
self._user_latent = fluid.dygraph.FC(self.full_name(), 256) self._user_latent = fluid.FC(self.full_name(), 256)
self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._item_latent = fluid.FC(self.full_name(), 256)
self._user_layers = [] self._user_layers = []
self._item_layers = [] self._item_layers = []
...@@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer): ...@@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer):
self._user_layers.append( self._user_layers.append(
self.add_sublayer( self.add_sublayer(
'user_layer_%d' % i, 'user_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
self._item_layers.append( self._item_layers.append(
self.add_sublayer( self.add_sublayer(
'item_layer_%d' % i, 'item_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
def forward(self, users, items): def forward(self, users, items):
users = self._user_latent(users) users = self._user_latent(users)
...@@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer): ...@@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer):
return fluid.layers.elementwise_mul(users, items) return fluid.layers.elementwise_mul(users, items)
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._user_latent = fluid.dygraph.FC(self.full_name(), 256) self._user_latent = fluid.FC(self.full_name(), 256)
self._item_latent = fluid.dygraph.FC(self.full_name(), 256) self._item_latent = fluid.FC(self.full_name(), 256)
self._match_layers = [] self._match_layers = []
self._hid_sizes = [128, 64] self._hid_sizes = [128, 64]
for i in range(len(self._hid_sizes)): for i in range(len(self._hid_sizes)):
self._match_layers.append( self._match_layers.append(
self.add_sublayer( self.add_sublayer(
'match_layer_%d' % i, 'match_layer_%d' % i,
fluid.dygraph.FC( fluid.FC(self.full_name(), self._hid_sizes[i], act='relu')))
self.full_name(), self._hid_sizes[i], act='relu')))
self._mat self._mat
def forward(self, users, items): def forward(self, users, items):
...@@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer): ...@@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer):
return match_vec return match_vec
class DeepCF(fluid.dygraph.Layer): class DeepCF(fluid.Layer):
def __init__(self, name_scope, num_users, num_items, matrix): def __init__(self, name_scope, num_users, num_items, matrix):
super(DeepCF, self).__init__(name_scope) super(DeepCF, self).__init__(name_scope)
self._num_users = num_users self._num_users = num_users
...@@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer): ...@@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer):
matrix.dtype, matrix.dtype,
is_bias=False, is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix._stop_gradient = True self._rating_matrix.stop_gradient = True
self._mlp = MLP(self.full_name()) self._mlp = MLP(self.full_name())
self._dmf = DMF(self.full_name()) self._dmf = DMF(self.full_name())
self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid') self._match_fc = fluid.FC(self.full_name(), 1, act='sigmoid')
def forward(self, users, items): def forward(self, users, items):
# users_emb = self._user_emb(users) # users_emb = self._user_emb(users)
...@@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase): ...@@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase):
fluid.layers.log_loss(prediction, fluid.layers.log_loss(prediction,
to_variable(labels_np[ to_variable(labels_np[
slice:slice + BATCH_SIZE]))) slice:slice + BATCH_SIZE])))
loss._backward() loss.backward()
adam.minimize(loss) adam.minimize(loss)
deepcf.clear_gradients() deepcf.clear_gradients()
dy_loss = loss._numpy() dy_loss = loss.numpy()
sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss)) sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss))
self.assertEqual(static_loss, dy_loss) self.assertEqual(static_loss, dy_loss)
......
...@@ -22,12 +22,12 @@ import paddle ...@@ -22,12 +22,12 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
class Discriminator(fluid.dygraph.Layer): class Discriminator(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Discriminator, self).__init__(name_scope) super(Discriminator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=32, act='elu') self._fc1 = FC(self.full_name(), size=32, act='elu')
...@@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer): ...@@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer):
return self._fc2(x) return self._fc2(x)
class Generator(fluid.dygraph.Layer): class Generator(fluid.Layer):
def __init__(self, name_scope): def __init__(self, name_scope):
super(Generator, self).__init__(name_scope) super(Generator, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), size=64, act='elu') self._fc1 = FC(self.full_name(), size=64, act='elu')
...@@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase): ...@@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase):
x=d_fake, label=to_variable(np.zeros([2, 1], np.float32)))) x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))))
d_loss = d_loss_real + d_loss_fake d_loss = d_loss_real + d_loss_fake
d_loss._backward() d_loss.backward()
sgd.minimize(d_loss) sgd.minimize(d_loss)
discriminator.clear_gradients() discriminator.clear_gradients()
generator.clear_gradients() generator.clear_gradients()
...@@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase): ...@@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase):
g_loss = fluid.layers.reduce_mean( g_loss = fluid.layers.reduce_mean(
fluid.layers.sigmoid_cross_entropy_with_logits( fluid.layers.sigmoid_cross_entropy_with_logits(
x=d_fake, label=to_variable(np.ones([2, 1], np.float32)))) x=d_fake, label=to_variable(np.ones([2, 1], np.float32))))
g_loss._backward() g_loss.backward()
sgd.minimize(g_loss) sgd.minimize(g_loss)
for p in discriminator.parameters(): for p in discriminator.parameters():
dy_params[p.name] = p._numpy() dy_params[p.name] = p.numpy()
for p in generator.parameters(): for p in generator.parameters():
dy_params[p.name] = p._numpy() dy_params[p.name] = p.numpy()
dy_g_loss = g_loss._numpy() dy_g_loss = g_loss.numpy()
dy_d_loss = d_loss._numpy() dy_d_loss = d_loss.numpy()
self.assertEqual(dy_g_loss, static_g_loss) self.assertEqual(dy_g_loss, static_g_loss)
self.assertEqual(dy_d_loss, static_d_loss) self.assertEqual(dy_d_loss, static_d_loss)
......
...@@ -15,14 +15,12 @@ ...@@ -15,14 +15,12 @@
import contextlib import contextlib
import unittest import unittest
import numpy as np import numpy as np
import six
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
...@@ -31,7 +29,7 @@ def gen_data(): ...@@ -31,7 +29,7 @@ def gen_data():
pass pass
class GraphConv(fluid.dygraph.Layer): class GraphConv(fluid.Layer):
def __init__(self, name_scope, in_features, out_features): def __init__(self, name_scope, in_features, out_features):
super(GraphConv, self).__init__(name_scope) super(GraphConv, self).__init__(name_scope)
...@@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer): ...@@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer):
return fluid.layers.matmul(adj, support) + self.bias return fluid.layers.matmul(adj, support) + self.bias
class GCN(fluid.dygraph.Layer): class GCN(fluid.Layer):
def __init__(self, name_scope, num_hidden): def __init__(self, name_scope, num_hidden):
super(GCN, self).__init__(name_scope) super(GCN, self).__init__(name_scope)
self.gc = GraphConv(self.full_name(), num_hidden, 32) self.gc = GraphConv(self.full_name(), num_hidden, 32)
...@@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase): ...@@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase):
loss = fluid.layers.reduce_sum(loss) loss = fluid.layers.reduce_sum(loss)
adam = AdamOptimizer(learning_rate=1e-3) adam = AdamOptimizer(learning_rate=1e-3)
adam.minimize(loss) adam.minimize(loss)
self.assertEqual(static_loss, loss._numpy()) self.assertEqual(static_loss, loss.numpy())
self.assertTrue( self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy()))
np.allclose(static_weight, model.gc.weight._numpy())) sys.stderr.write('%s %s\n' % (static_loss, loss.numpy()))
sys.stderr.write('%s %s\n' % (static_loss, loss._numpy()))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
cost = mnist(img) cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if epoch == 0 and batch_id == 0: if epoch == 0 and batch_id == 0:
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
mnist.clear_gradients() mnist.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable ...@@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MLP(fluid.dygraph.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope, param_attr=None, bias_attr=None): def __init__(self, name_scope, param_attr=None, bias_attr=None):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
...@@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase): ...@@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase):
cost = mlp(img) cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost) avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if batch_id == 0: if batch_id == 0:
for param in mlp.parameters(): for param in mlp.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
mlp.clear_gradients() mlp.clear_gradients()
dy_param_value = {} dy_param_value = {}
for param in mlp.parameters(): for param in mlp.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable ...@@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
import numpy as np import numpy as np
import six import six
from paddle.fluid.backward import append_backward
class SimpleLSTMRNN(fluid.dygraph.Layer): class SimpleLSTMRNN(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): ...@@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
self.cell_array = [] self.cell_array = []
self.hidden_array = [] self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None): def build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
...@@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): ...@@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer):
return real_res, last_hidden, last_cell return real_res, last_hidden, last_cell
class PtbModel(fluid.dygraph.Layer): class PtbModel(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer): ...@@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer):
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale)) low=-self.init_scale, high=self.init_scale))
def _build_once(self, input, label, init_hidden, init_cell): def build_once(self, input, label, init_hidden, init_cell):
pass pass
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
...@@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_cell) init_cell)
if i == 0: if i == 0:
for param in ptb_model.parameters(): for param in ptb_model.parameters():
dy_param_init[param.name] = param._numpy() dy_param_init[param.name] = param.numpy()
dy_loss._backward() dy_loss.backward()
sgd.minimize(dy_loss) sgd.minimize(dy_loss)
ptb_model.clear_gradients() ptb_model.clear_gradients()
if i == batch_num - 1: if i == batch_num - 1:
for param in ptb_model.parameters(): for param in ptb_model.parameters():
dy_param_updated[param.name] = param._numpy() dy_param_updated[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
...@@ -333,10 +332,10 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -333,10 +332,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k in range(3, len(out)): for k in range(3, len(out)):
static_param_updated[static_param_name_list[k - static_param_updated[static_param_name_list[k -
3]] = out[k] 3]] = out[k]
self.assertTrue(np.allclose(static_loss_value, dy_loss._numpy())) self.assertTrue(np.allclose(static_loss_value, dy_loss.numpy()))
self.assertTrue(np.allclose(static_last_cell_value, last_cell._numpy())) self.assertTrue(np.allclose(static_last_cell_value, last_cell.numpy()))
self.assertTrue( self.assertTrue(
np.allclose(static_last_hidden_value, last_hidden._numpy())) np.allclose(static_last_hidden_value, last_hidden.numpy()))
for key, value in six.iteritems(static_param_init): for key, value in six.iteritems(static_param_init):
# print("static_init name: {}, value {}".format(key, value)) # print("static_init name: {}, value {}".format(key, value))
# print("dy_init name: {}, value {}".format(key, dy_param_init[key])) # print("dy_init name: {}, value {}".format(key, dy_param_init[key]))
......
...@@ -21,7 +21,7 @@ import paddle ...@@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
...@@ -68,7 +68,7 @@ def optimizer_setting(params): ...@@ -68,7 +68,7 @@ def optimizer_setting(params):
return optimizer return optimizer
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
return y return y
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
num_channels, num_channels,
...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
return layer_helper.append_activation(y) return layer_helper.append_activation(y)
class ResNet(fluid.dygraph.Layer): class ResNet(fluid.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, name_scope, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope) super(ResNet, self).__init__(name_scope)
...@@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_init_value = {} dy_param_init_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id >= batch_num: if batch_id >= batch_num:
...@@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase):
img = to_variable(dy_x_data) img = to_variable(dy_x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label.stop_gradient = True
out = resnet(img) out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss) avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy() dy_out = avg_loss.numpy()
if batch_id == 0: if batch_id == 0:
for param in resnet.parameters(): for param in resnet.parameters():
if param.name not in dy_param_init_value: if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy() dy_param_init_value[param.name] = param.numpy()
avg_loss._backward() avg_loss.backward()
dy_grad_value = {} dy_grad_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
...@@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase): ...@@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase):
dy_param_value = {} dy_param_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
......
...@@ -16,7 +16,8 @@ from __future__ import print_function ...@@ -16,7 +16,8 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard from paddle.fluid import Embedding, LayerNorm, FC, Layer
from paddle.fluid.dygraph import to_variable, guard
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid import core from paddle.fluid import core
import numpy as np import numpy as np
...@@ -985,15 +986,15 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -985,15 +986,15 @@ class TestDygraphTransformer(unittest.TestCase):
if i == 0: if i == 0:
for param in transformer.parameters(): for param in transformer.parameters():
dy_param_init[param.name] = param._numpy() dy_param_init[param.name] = param.numpy()
dy_avg_cost._backward() dy_avg_cost.backward()
optimizer.minimize(dy_avg_cost) optimizer.minimize(dy_avg_cost)
transformer.clear_gradients() transformer.clear_gradients()
if i == batch_num - 1: if i == batch_num - 1:
for param in transformer.parameters(): for param in transformer.parameters():
dy_param_updated[param.name] = param._numpy() dy_param_updated[param.name] = param.numpy()
with new_program_scope(): with new_program_scope():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
...@@ -1069,13 +1070,13 @@ class TestDygraphTransformer(unittest.TestCase): ...@@ -1069,13 +1070,13 @@ class TestDygraphTransformer(unittest.TestCase):
4]] = out[k] 4]] = out[k]
self.assertTrue( self.assertTrue(
np.array_equal(static_avg_cost_value, dy_avg_cost._numpy())) np.array_equal(static_avg_cost_value, dy_avg_cost.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_sum_cost_value, dy_sum_cost._numpy())) np.array_equal(static_sum_cost_value, dy_sum_cost.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_predict_value, dy_predict._numpy())) np.array_equal(static_predict_value, dy_predict.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(static_token_num_value, dy_token_num._numpy())) np.array_equal(static_token_num_value, dy_token_num.numpy()))
for key, value in six.iteritems(static_param_init): for key, value in six.iteritems(static_param_init):
self.assertTrue(np.array_equal(value, dy_param_init[key])) self.assertTrue(np.array_equal(value, dy_param_init[key]))
for key, value in six.iteritems(static_param_updated): for key, value in six.iteritems(static_param_updated):
......
...@@ -102,7 +102,7 @@ class TestLayer(LayerTest): ...@@ -102,7 +102,7 @@ class TestLayer(LayerTest):
dy_ret = lm(base.to_variable(inp)) dy_ret = lm(base.to_variable(inp))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
self.assertTrue(np.allclose(dy_ret._numpy(), static_ret2)) self.assertTrue(np.allclose(dy_ret.numpy(), static_ret2))
def test_relu(self): def test_relu(self):
with self.static_graph(): with self.static_graph():
...@@ -116,7 +116,7 @@ class TestLayer(LayerTest): ...@@ -116,7 +116,7 @@ class TestLayer(LayerTest):
t = np.ones([3, 3], dtype='float32') t = np.ones([3, 3], dtype='float32')
dy_ret = layers.relu(base.to_variable(t)) dy_ret = layers.relu(base.to_variable(t))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_matmul(self): def test_matmul(self):
with self.static_graph(): with self.static_graph():
...@@ -137,7 +137,7 @@ class TestLayer(LayerTest): ...@@ -137,7 +137,7 @@ class TestLayer(LayerTest):
t2 = np.ones([3, 3], dtype='float32') t2 = np.ones([3, 3], dtype='float32')
dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2)) dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv2d(self): def test_conv2d(self):
with self.static_graph(): with self.static_graph():
...@@ -164,7 +164,7 @@ class TestLayer(LayerTest): ...@@ -164,7 +164,7 @@ class TestLayer(LayerTest):
'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2]) 'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2])
dy_ret = conv2d(base.to_variable(images)) dy_ret = conv2d(base.to_variable(images))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_gru_unit(self): def test_gru_unit(self):
...@@ -206,7 +206,7 @@ class TestLayer(LayerTest): ...@@ -206,7 +206,7 @@ class TestLayer(LayerTest):
for i in range(len(static_ret)): for i in range(len(static_ret)):
self.assertTrue(np.allclose(static_ret[i], static_ret2[i])) self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy())) self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy()))
def test_elementwise_math(self): def test_elementwise_math(self):
n = np.ones([3, 3], dtype='float32') n = np.ones([3, 3], dtype='float32')
...@@ -248,8 +248,8 @@ class TestLayer(LayerTest): ...@@ -248,8 +248,8 @@ class TestLayer(LayerTest):
ret = layers.elementwise_sub(ret, n5) ret = layers.elementwise_sub(ret, n5)
dy_ret = layers.elementwise_mul(ret, n6) dy_ret = layers.elementwise_mul(ret, n6)
self.assertTrue( self.assertTrue(
np.allclose(static_ret, dy_ret._numpy()), np.allclose(static_ret, dy_ret.numpy()),
'%s vs %s' % (static_ret, dy_ret._numpy())) '%s vs %s' % (static_ret, dy_ret.numpy()))
def test_elementwise_minmax(self): def test_elementwise_minmax(self):
n = np.ones([3, 3], dtype='float32') n = np.ones([3, 3], dtype='float32')
...@@ -259,8 +259,8 @@ class TestLayer(LayerTest): ...@@ -259,8 +259,8 @@ class TestLayer(LayerTest):
min_ret = layers.elementwise_min(n, n2) min_ret = layers.elementwise_min(n, n2)
max_ret = layers.elementwise_max(n, n2) max_ret = layers.elementwise_max(n, n2)
self.assertTrue(np.allclose(n, min_ret._numpy())) self.assertTrue(np.allclose(n, min_ret.numpy()))
self.assertTrue(np.allclose(n2, max_ret._numpy())) self.assertTrue(np.allclose(n2, max_ret.numpy()))
def test_sequence_conv(self): def test_sequence_conv(self):
inp_np = np.arange(12).reshape([3, 4]).astype('float32') inp_np = np.arange(12).reshape([3, 4]).astype('float32')
...@@ -327,7 +327,7 @@ class TestLayer(LayerTest): ...@@ -327,7 +327,7 @@ class TestLayer(LayerTest):
'conv2d_transpose', num_filters=10, output_size=28) 'conv2d_transpose', num_filters=10, output_size=28)
dy_rlt = conv2d_transpose(base.to_variable(inp_np)) dy_rlt = conv2d_transpose(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_bilinear_tensor_product(self): def test_bilinear_tensor_product(self):
inp_np_x = np.array([[1, 2, 3]]).astype('float32') inp_np_x = np.array([[1, 2, 3]]).astype('float32')
...@@ -370,7 +370,7 @@ class TestLayer(LayerTest): ...@@ -370,7 +370,7 @@ class TestLayer(LayerTest):
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_prelu(self): def test_prelu(self):
inp_np = np.ones([5, 200, 100, 100]).astype('float32') inp_np = np.ones([5, 200, 100, 100]).astype('float32')
...@@ -411,7 +411,7 @@ class TestLayer(LayerTest): ...@@ -411,7 +411,7 @@ class TestLayer(LayerTest):
dy_rlt = prelu(base.to_variable(inp_np)) dy_rlt = prelu(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
def test_embeding(self): def test_embeding(self):
inp_word = np.array([[[1]]]).astype('int64') inp_word = np.array([[[1]]]).astype('int64')
...@@ -444,7 +444,7 @@ class TestLayer(LayerTest): ...@@ -444,7 +444,7 @@ class TestLayer(LayerTest):
static_rlt3 = emb2(base.to_variable(inp_word)) static_rlt3 = emb2(base.to_variable(inp_word))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt)) self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt))
def test_nce(self): def test_nce(self):
window_size = 5 window_size = 5
...@@ -558,7 +558,7 @@ class TestLayer(LayerTest): ...@@ -558,7 +558,7 @@ class TestLayer(LayerTest):
nce_loss3 = nce(embs3, words[label_word]) nce_loss3 = nce(embs3, words[label_word])
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt))
def test_conv3d(self): def test_conv3d(self):
with self.static_graph(): with self.static_graph():
...@@ -585,7 +585,7 @@ class TestLayer(LayerTest): ...@@ -585,7 +585,7 @@ class TestLayer(LayerTest):
conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2)
dy_ret = conv3d(base.to_variable(images)) dy_ret = conv3d(base.to_variable(images))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_row_conv(self): def test_row_conv(self):
...@@ -679,7 +679,7 @@ class TestLayer(LayerTest): ...@@ -679,7 +679,7 @@ class TestLayer(LayerTest):
groupNorm = nn.GroupNorm('GroupNorm', groups=2) groupNorm = nn.GroupNorm('GroupNorm', groups=2)
dy_ret = groupNorm(base.to_variable(input)) dy_ret = groupNorm(base.to_variable(input))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_spectral_norm(self): def test_spectral_norm(self):
...@@ -729,7 +729,7 @@ class TestLayer(LayerTest): ...@@ -729,7 +729,7 @@ class TestLayer(LayerTest):
spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
dy_ret = spectralNorm(base.to_variable(input)) dy_ret = spectralNorm(base.to_variable(input))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
def test_tree_conv(self): def test_tree_conv(self):
...@@ -802,7 +802,7 @@ class TestLayer(LayerTest): ...@@ -802,7 +802,7 @@ class TestLayer(LayerTest):
dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
self.assertTrue(np.allclose(static_ret, static_ret2)) self.assertTrue(np.allclose(static_ret, static_ret2))
self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
def test_conv3d_transpose(self): def test_conv3d_transpose(self):
input_array = np.arange(0, 48).reshape( input_array = np.arange(0, 48).reshape(
...@@ -832,7 +832,7 @@ class TestLayer(LayerTest): ...@@ -832,7 +832,7 @@ class TestLayer(LayerTest):
use_cudnn=False) use_cudnn=False)
dy_rlt = conv3d_transpose(base.to_variable(input_array)) dy_rlt = conv3d_transpose(base.to_variable(input_array))
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册