From 494d8ca65009b8cc2682fde7bd86d2130209ac7e Mon Sep 17 00:00:00 2001 From: lujun Date: Thu, 11 Apr 2019 11:47:58 +0800 Subject: [PATCH] cherry pick move api, test=release/1.4 --- paddle/fluid/API.spec | 1 + python/paddle/fluid/__init__.py | 2 + python/paddle/fluid/dygraph/base.py | 2 +- python/paddle/fluid/dygraph/checkpoint.py | 10 +- .../fluid/dygraph/layer_object_helper.py | 2 +- python/paddle/fluid/dygraph/layers.py | 6 +- python/paddle/fluid/dygraph/nn.py | 588 +++++++++++++++--- python/paddle/fluid/framework.py | 62 +- python/paddle/fluid/initializer.py | 16 +- python/paddle/fluid/layer_helper.py | 2 +- python/paddle/fluid/layer_helper_base.py | 8 +- python/paddle/fluid/layers/nn.py | 8 +- python/paddle/fluid/optimizer.py | 10 +- .../fluid/tests/unittests/test_base_layer.py | 10 +- .../tests/unittests/test_imperative_basic.py | 52 +- .../unittests/test_imperative_checkpoint.py | 14 +- .../tests/unittests/test_imperative_deepcf.py | 31 +- .../tests/unittests/test_imperative_gan.py | 18 +- .../tests/unittests/test_imperative_gnn.py | 13 +- .../tests/unittests/test_imperative_mnist.py | 10 +- .../unittests/test_imperative_optimizer.py | 10 +- .../unittests/test_imperative_ptb_rnn.py | 21 +- .../tests/unittests/test_imperative_resnet.py | 20 +- .../unittests/test_imperative_transformer.py | 17 +- .../fluid/tests/unittests/test_layers.py | 38 +- 25 files changed, 703 insertions(+), 268 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index b19d50a6ad..8143bde302 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210')) paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912')) +paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581')) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index eb6895f2a6..811eec9072 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -66,6 +66,8 @@ from . import compiler from .compiler import * from paddle.fluid.layers.math_op_patch import monkey_patch_variable from . import install_check +from .dygraph.nn import * +from .dygraph.layers import * Tensor = LoDTensor diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index d55dbbb9c7..bf484b35c7 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable'] def enabled(): - return framework._in_dygraph_mode() + return framework.in_dygraph_mode() @signature_safe_contextmanager diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index f992ae0576..f2b01aece7 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None): Examples: .. code-block:: python - my_layer = layer(fluid.dygraph.Layer) + my_layer = layer(fluid.Layer) param_path = "./my_paddle_model" param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path) param_1 = param_dict['PtbModel_0.w_1'] - or: - my_layer = layer(fluid.dygraph.Layer) - param_path = "./my_paddle_model" - filename = "model.file" - param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path, - filename=filename) - param_1 = param_dict['PtbModel_0.w_1'] - """ if isinstance(vardict, collections.OrderedDict): return _load_var_from_file(vardict, dirname, filename) diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py index f8e607aab8..b757f8fff2 100644 --- a/python/paddle/fluid/dygraph/layer_object_helper.py +++ b/python/paddle/fluid/dygraph/layer_object_helper.py @@ -16,7 +16,7 @@ from __future__ import print_function import copy import six -from ..framework import Parameter, _in_dygraph_mode +from ..framework import Parameter, in_dygraph_mode from ..param_attr import ParamAttr from .. import core from six.moves import zip diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 014ee41f4c..39e06e3486 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -139,14 +139,14 @@ class Layer(core.Layer): def clear_gradients(self): for p in self.parameters(): - p._clear_gradient() + p.clear_gradient() - def _build_once(self, *args): + def build_once(self, *args): pass def __call__(self, *inputs): if not self._built: - self._build_once(*inputs) + self.build_once(*inputs) outputs = self.forward(*inputs) self._built = True diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 527c37cb2c..6384e56788 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -19,7 +19,7 @@ from six.moves import reduce from .. import core from ..layers import utils from . import layers -from ..framework import Variable, _in_dygraph_mode, OpProtoHolder, Parameter +from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer import numpy as np @@ -33,6 +33,109 @@ __all__ = [ class Conv2D(layers.Layer): + """ + The convolution2D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input and + Output are in NCHW format, where N is batch size, C is the number of + channels, H is the height of the feature, and W is the width of the feature. + Filter is in MCHW format, where M is the number of output image channels, + C is the number of input image channels, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input image channels divided by the groups. + Please refer to UFLDL's `convolution + `_ + for more detials. + If bias attribution and activation type are provided, bias is added to the + output of the convolution, and the corresponding activation function is + applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a tensor with NCHW format. + * :math:`W`: Filter value, a tensor with MCHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 + + Args: + input (Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size (int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride (int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding (int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + dilation (int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups (int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1. + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of conv2d. If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with :math:`Normal(0.0, std)`, + and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act (str): Activation type, if it is set to None, activation is not appended. + Default: None + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: None + + Returns: + Variable: The tensor variable storing the convolution and \ + non-linearity activation result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu") + """ + def __init__(self, name_scope, num_channels, @@ -265,7 +368,7 @@ class Conv3D(layers.Layer): self._param_attr = param_attr self._bias_attr = bias_attr - def _build_once(self, input): + def build_once(self, input): num_channels = input.shape[1] self._dtype = self._helper.input_dtype(input) @@ -332,6 +435,116 @@ class Conv3D(layers.Layer): class Conv3DTranspose(layers.Layer): + """ + **Convlution3D transpose layer** + + The convolution3D transpose layer calculates the output based on the input, + filter, and dilations, strides, paddings. Input(Input) and output(Output) + are in NCDHW format. Where N is batch size, C is the number of channels, + D is the depth of the feature, H is the height of the feature, and W + is the width of the feature. Parameters(dilations, strides, paddings) are + two elements. These two elements represent height and width, respectively. + The details of convolution transpose layer, please refer to the following + explanation and references `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCDHW format. + * :math:`W`: Filter value, a tensor with MCDHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\ + H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\ + W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 + + Args: + input(Variable): The input image with [N, C, D, H, W] format. + num_filters(int): The number of the filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain three integers, (image_D, image_H, image_W). This + parameter only works when filter_size is None. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain three integers, (padding_D, padding_H, padding_W). Otherwise, the + padding_D = padding_H = padding_W = padding. Default: padding = 0. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain three integers, (stride_D, stride_H, stride_W). Otherwise, the + stride_D = stride_H = stride_W = stride. Default: stride = 1. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1. + groups(int): The groups number of the Conv3d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups=1 + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d_transpose. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv3d_transpose + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act (str): Activation type, if it is set to None, activation is not appended. + Default: None. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The tensor variable storing the convolution transpose result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + conv3d_transpose = nn.Conv3DTranspose( + 'Conv3DTranspose', + num_filters=12, + filter_size=12, + use_cudnn=False) + transpose_res = conv3d_transpose(base.to_variable(input_array)) + """ + def __init__(self, name_scope, num_filters, @@ -362,7 +575,7 @@ class Conv3DTranspose(layers.Layer): self._bias_attr = bias_attr self._act = act - def _build_once(self, input): + def build_once(self, input): self._dtype = self._helper.input_dtype(input) self._input_channel = input.shape[1] @@ -436,6 +649,54 @@ class Conv3DTranspose(layers.Layer): class Pool2D(layers.Layer): + """ + ${comment} + + Args: + input (Variable): The input tensor of pooling operator. The format of + input tensor is NCHW, where N is batch size, C is + the number of channels, H is the height of the + feature, and W is the width of the feature. + pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + pool_type: ${pooling_type_comment} + pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. + pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple, + it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width). + Otherwise, the pool padding size will be a square of an int. + global_pooling (bool): ${global_pooling_comment} + use_cudnn (bool): ${use_cudnn_comment} + ceil_mode (bool): ${ceil_mode_comment} + name (str|None): A name for this layer(optional). If set None, the + layer will be named automatically. + exclusive (bool): Whether to exclude padding points in average pooling + mode, default is true + + Returns: + Variable: The pooling result. + + Raises: + ValueError: If 'pool_type' is not "max" nor "avg" + ValueError: If 'global_pooling' is False and 'pool_size' is -1 + ValueError: If 'use_cudnn' is not a bool value. + + Examples: + + .. code-block:: python + + data = fluid.layers.data( + name='data', shape=[3, 32, 32], dtype='float32') + pool2d = fluid.Pool2D("pool2d",pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) + + pool2d_res = pool2d(data) + """ + def __init__(self, name_scope, pool_size=-1, @@ -495,6 +756,102 @@ class Pool2D(layers.Layer): class FC(layers.Layer): + """ + **Fully Connected Layer** + + This function creates a fully connected layer in the network. It can take + one or multiple tensors as its inputs(input can be a list of Variable, see + Args in detail). It creates a variable called weights for each input tensor, + which represents a fully connected weight matrix from each input unit to + each output unit. The fully connected layer multiplies each input tensor + with its corresponding weight to produce an output Tensor with shape [M, `size`], + where M is batch size. If multiple input tensors are given, the results of + multiple output tensors with shape [M, `size`] will be summed up. If bias_attr + is not None, a bias variable will be created and added to the output. + Finally, if activation is not None, it will be applied to the output as well. + + When the input is single tensor: + + .. math:: + + Out = Act({XW + b}) + + When the input are multiple tensors: + + .. math:: + + Out = Act({\sum_{i=0}^{N-1}X_iW_i + b}) + + In the above equation: + + * :math:`N`: Number of the input. N equals to len(input) if input is list of Variable. + * :math:`X_i`: The i-th input tensor. + * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor. + * :math:`b`: The bias parameter created by this layer (if needed). + * :math:`Act`: The activation function. + * :math:`Out`: The output tensor. + + See below for an example. + + .. code-block:: text + + Given: + data_1.data = [[[0.1, 0.2], + [0.3, 0.4]]] + data_1.shape = (1, 2, 2) # 1 is batch_size + + data_2 = [[[0.1, 0.2, 0.3]]] + data_2.shape = (1, 1, 3) + + out = fluid.layers.fc(input=[data_1, data_2], size=2) + + Then: + out.data = [[0.18669507, 0.1893476]] + out.shape = (1, 2) + + Args: + input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of + the input tensor(s) is at least 2. + size(int): The number of output units in this layer. + num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multidimensional tensor will first be flattened + into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input + tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) + dimensions will be flatten to form the first dimension of the final matrix (height of + the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to + form the second dimension of the final matrix (width of the matrix). For example, suppose + `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. + Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. + param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable + parameters/weights of this layer. + bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + act (str, default None): Activation to be applied to the output of this layer. + is_test(bool): A flag indicating whether execution is in test phase. + name (str, default None): The name of this layer. + + Returns: + Variable: The transformation result. + + Raises: + ValueError: If rank of the input tensor is less than 2. + + Examples: + .. code-block:: python + + # when input is single tensor + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + fc = fluid.FC("fc", size=1000, act="tanh") + fc_res = fc(data) + + # when input are multiple tensors + data_1 = fluid.layers.data(name="data_1", shape=[32, 32], dtype="float32") + data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32") + fc = fluid.FC("fc", size=1000, act="tanh") + fc_res = fc([data_1, data_2]) + """ + def __init__(self, name_scope, size, @@ -522,7 +879,7 @@ class FC(layers.Layer): assert isinstance(value, Parameter) self.__w[i] = value - def _build_once(self, input): + def build_once(self, input): i = 0 for inp, param in self._helper.iter_inputs_and_params(input, self._param_attr): @@ -591,6 +948,91 @@ class FC(layers.Layer): class BatchNorm(layers.Layer): + """ + **Batch Normalization Layer** + + Can be used as a normalizer function for conv2d and fully_connected operations. + The required data format for this layer is one of the following: + + 1. NHWC `[batch, in_height, in_width, in_channels]` + + 2. NCHW `[batch, in_channels, in_height, in_width]` + + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift `_ + for more details. + + :math:`input` is the input features over a mini-batch. + + .. math:: + + \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\ + \ mini-batch\ mean \\\\ + \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ + \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ + \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ + \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ + y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift + + + When use_global_stats = True, the :math:`\\mu_{\\beta}` + and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. + They are global (or running) statistics. (It usually got from the + pre-trained model.) + The training and testing (or inference) have the same behavior: + + .. math:: + + \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ + \\sigma_{\\beta}^{2} + \\epsilon}} \\\\ + y_i &\\gets \\gamma \\hat{x_i} + \\beta + + Args: + input(variable): The rank of input variable can be 2, 3, 4, 5. + act(string, Default None): Activation type, linear|relu|prelu|... + is_test (bool, Default False): A flag indicating whether it is in + test phrase or not. + momentum(float, Default 0.9): The value used for the moving_mean and + moving_var computation. The updated formula is: + :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` + :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` + Default is 0.9. + epsilon(float, Default 1e-05): A value added to the denominator for + numerical stability. Default is 1e-5. + param_attr(ParamAttr|None): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + data_layout(string, default NCHW): NCHW|NHWC + in_place(bool, Default False): Make the input and output of batch norm reuse memory. + name(string, Default None): A name for this layer(optional). If set None, the layer + will be named automatically. + moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. + moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. + do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. + fuse_with_relu (bool): if True, this OP performs relu after batch norm. + use_global_stats(bool, Default False): Whether to use global mean and + variance. In inference or test mode, set use_global_stats to true + or is_test to true, and the behavior is equivalent. + In train mode, when setting use_global_stats True, the global mean + and variance are also used during train period. + + Returns: + Variable: A tensor variable which is the result after applying batch normalization on the input. + + Examples: + + .. code-block:: python + fc = fluid.FC('fc', size=200, param_attr='fc1.w') + hidden1 = fc(x) + batch_norm = fluid.BatchNorm("batch_norm", 10) + hidden2 = batch_norm(hidden1) + """ + def __init__(self, name_scope, num_channels, @@ -629,7 +1071,7 @@ class BatchNorm(layers.Layer): dtype=self._dtype, default_initializer=Constant(1.0)) if use_global_stats and self._param_attr.learning_rate == 0.: - self._scale._stop_gradient = True + self._scale.stop_gradient = True self._bias = self.create_parameter( attr=self._param_attr, @@ -637,7 +1079,7 @@ class BatchNorm(layers.Layer): dtype=self._dtype, is_bias=True) if use_global_stats and self._param_attr.learning_rate == 0.: - self._bias._stop_gradient = True + self._bias.stop_gradient = True self._mean = self.create_parameter( attr=ParamAttr( @@ -647,7 +1089,7 @@ class BatchNorm(layers.Layer): do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=self._dtype) - self._mean._stop_gradient = True + self._mean.stop_gradient = True self._variance = self.create_parameter( attr=ParamAttr( @@ -657,7 +1099,7 @@ class BatchNorm(layers.Layer): do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=self._dtype) - self._variance._stop_gradient = True + self._variance.stop_gradient = True self._in_place = in_place self._momentum = momentum @@ -666,7 +1108,7 @@ class BatchNorm(layers.Layer): self._fuse_with_relu = fuse_with_relu self._use_global_stats = use_global_stats - def _build_once(self, input): + def build_once(self, input): pass def forward(self, input): @@ -747,7 +1189,7 @@ class Embedding(layers.Layer): dict_size = len(dataset.ids) input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') - embedding = fluid.dygraph.Embedding(size=[dict_size, 16]) + embedding = fluid.Embedding(size=[dict_size, 16]) fc = embedding(input) """ @@ -797,70 +1239,70 @@ class Embedding(layers.Layer): class LayerNorm(layers.Layer): - def __init__(self, - name_scope, - scale=True, - shift=True, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - act=None): - """ - ${comment} + """ + ${comment} - The formula is as follows: + The formula is as follows: - .. math:: + .. math:: - \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i + \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i - \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2} + \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2} - h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) + h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) - * :math:`a`: the vector representation of the summed inputs to the neurons - in that layer. + * :math:`a`: the vector representation of the summed inputs to the neurons + in that layer. - * :math:`H`: the number of hidden units in a layers + * :math:`H`: the number of hidden units in a layers - * :math:`g`: the trainable scale parameter. + * :math:`g`: the trainable scale parameter. - * :math:`b`: the trainable bias parameter. + * :math:`b`: the trainable bias parameter. - Args: - input(Variable): The input tensor variable. - scale(bool): Whether to learn the adaptive gain :math:`g` after - normalization. Default True. - shift(bool): Whether to learn the adaptive bias :math:`b` after - normalization. Default True. - begin_norm_axis(int): The normalization will be performed along - dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. - Default 1. - epsilon(float): The small value added to the variance to prevent - division by zero. Default 1e-05. - param_attr(ParamAttr|None): The parameter attribute for the learnable - gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is - omitted. If :attr:`scale` is True and :attr:`param_attr` is None, - a default :code:`ParamAttr` would be added as scale. The - :attr:`param_attr` is initialized as 1 if it is added. Default None. - bias_attr(ParamAttr|None): The parameter attribute for the learnable - bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is - omitted. If :attr:`shift` is True and :attr:`param_attr` is None, - a default :code:`ParamAttr` would be added as bias. The - :attr:`bias_attr` is initialized as 0 if it is added. Default None. - act(str): Activation to be applied to the output of layer normalizaiton. - Default None. - Returns: - ${y_comment} + Args: + input(Variable): The input tensor variable. + scale(bool): Whether to learn the adaptive gain :math:`g` after + normalization. Default True. + shift(bool): Whether to learn the adaptive bias :math:`b` after + normalization. Default True. + begin_norm_axis(int): The normalization will be performed along + dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. + Default 1. + epsilon(float): The small value added to the variance to prevent + division by zero. Default 1e-05. + param_attr(ParamAttr|None): The parameter attribute for the learnable + gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is + omitted. If :attr:`scale` is True and :attr:`param_attr` is None, + a default :code:`ParamAttr` would be added as scale. The + :attr:`param_attr` is initialized as 1 if it is added. Default None. + bias_attr(ParamAttr|None): The parameter attribute for the learnable + bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is + omitted. If :attr:`shift` is True and :attr:`param_attr` is None, + a default :code:`ParamAttr` would be added as bias. The + :attr:`bias_attr` is initialized as 0 if it is added. Default None. + act(str): Activation to be applied to the output of layer normalizaiton. + Default None. + Returns: + ${y_comment} - Examples: + Examples: - >>> data = fluid.layers.data(name='data', shape=[3, 32, 32], - >>> dtype='float32') - >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) - """ + >>> data = fluid.layers.data(name='data', shape=[3, 32, 32], + >>> dtype='float32') + >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) + """ + def __init__(self, + name_scope, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + act=None): super(LayerNorm, self).__init__(name_scope) self._scale = scale self._shift = shift @@ -870,7 +1312,7 @@ class LayerNorm(layers.Layer): self._bias_attr = bias_attr self._act = act - def _build_once(self, input): + def build_once(self, input): self._dtype = self._helper.input_dtype(input) input_shape = input.shape param_shape = [ @@ -1232,7 +1674,7 @@ class NCE(layers.Layer): 'remote_prefetch': remote_prefetch } - def _build_once(self, input, label, sample_weight=None): + def build_once(self, input, label, sample_weight=None): assert isinstance(input, Variable) assert isinstance(label, Variable) @@ -1318,7 +1760,7 @@ class PRelu(layers.Layer): raise ValueError('mode should be one of all, channel, element.') self._alpha_shape = [1] - def _build_once(self, input): + def build_once(self, input): if self._mode == 'channel': self._alpha_shape = [1, input.shape[1], 1, 1] elif self._mode == 'element': @@ -1396,7 +1838,7 @@ class BilinearTensorProduct(layers.Layer): self._name = name self._inputs = dict() - def _build_once(self, x, y): + def build_once(self, x, y): self._dtype = self._helper.input_dtype(x) param_shape = [self._size, x.shape[1], y.shape[1]] @@ -1572,7 +2014,7 @@ class Conv2DTranspose(layers.Layer): self._output_size = output_size self._op_type = 'conv2d_transpose' - def _build_once(self, input): + def build_once(self, input): input_channel = input.shape[1] if (input_channel == self._groups and self._num_filters == input_channel and not self._use_cudnn): @@ -1686,7 +2128,7 @@ class SequenceConv(layers.Layer): bias_attr=None, param_attr=None, act=None): - assert not _in_dygraph_mode( + assert not in_dygraph_mode( ), "SequenceConv is not supported by dynamic graph mode yet!" super(SequenceConv, self).__init__(name_scope) self._num_filters = num_filters @@ -1696,7 +2138,7 @@ class SequenceConv(layers.Layer): self._bias_attr = bias_attr self._param_attr = param_attr - def _build_once(self, input): + def build_once(self, input): self._dtype = self._helper.input_dtype(input) filter_shape = [self._filter_size * input.shape[1], self._num_filters] self._filter_param = self.create_parameter( @@ -1726,14 +2168,14 @@ class RowConv(layers.Layer): future_context_size, param_attr=None, act=None): - assert not _in_dygraph_mode( + assert not in_dygraph_mode( ), "RowConv is not supported by dynamic graph mode yet!" super(RowConv, self).__init__(name_scope) self._act = act self._param_attr = param_attr self._future_context_size = future_context_size - def _build_once(self, input): + def build_once(self, input): self._dtype = self._helper.input_dtype(input) filter_shape = [self._future_context_size + 1, input.shape[1]] self._filter_param = self.create_parameter( @@ -1796,7 +2238,7 @@ class GroupNorm(layers.Layer): if data_layout != 'NCHW': raise ValueError("unsupported data layout:" + data_layout) - def _build_once(self, input): + def build_once(self, input): self._dtype = self._helper.input_dtype(input) param_shape = [input.shape[1]] if self._bias_attr: @@ -1849,7 +2291,7 @@ class SpectralNorm(layers.Layer): self._eps = eps self._dim = dim - def _build_once(self, weight): + def build_once(self, weight): self._dtype = self._helper.input_dtype(weight) input_shape = weight.shape h = input_shape[self._dim] @@ -1904,7 +2346,7 @@ class TreeConv(layers.Layer): self._bias_attr = bias_attr self._param_attr = param_attr - def _build_once(self, nodes_vector, edge_set): + def build_once(self, nodes_vector, edge_set): assert isinstance(nodes_vector, Variable) assert isinstance(edge_set, Variable) self._dtype = self._helper.input_dtype(nodes_vector) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 7953d98bcb..c05e5fb9e3 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -67,6 +67,7 @@ __all__ = [ 'cuda_places', 'cpu_places', 'cuda_pinned_places', + 'in_dygraph_mode', ] EMPTY_VAR_NAME = core.kEmptyVarName() @@ -79,7 +80,10 @@ _dygraph_tracer_ = None _dygraph_current_expected_place_ = None -def _in_dygraph_mode(): +def in_dygraph_mode(): + ''' + Returns(bool): True if the program is running in dynamic graph mode + ''' return _dygraph_tracer_ is not None @@ -396,7 +400,7 @@ class Variable(object): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - if _in_dygraph_mode(): + if in_dygraph_mode(): # record vars in tracer rather than blocks self._ivar = kwargs.get("ivar", None) if not self._ivar: @@ -482,21 +486,21 @@ class Variable(object): self.block.vars[name] = self self.op = None - self.stop_gradient = stop_gradient + self._stop_gradient = stop_gradient self.is_data = is_data - def _numpy(self): + def numpy(self): new_ivar = self._ivar._copy_to(core.CPUPlace(), True) return np.array(new_ivar.value().get_tensor()) - def _backward(self): + def backward(self): self._ivar._run_backward() - def _gradient(self): + def gradient(self): new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True) return np.array(new_ivar.value().get_tensor()) - def _clear_gradient(self): + def clear_gradient(self): self._ivar._clear_gradient() def __str__(self): @@ -516,7 +520,7 @@ class Variable(object): Returns: str: The debug string. """ - if _in_dygraph_mode(): + if in_dygraph_mode(): # TODO(panyx0718): add more dygraph debug info. return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype, self.shape) @@ -535,7 +539,7 @@ class Variable(object): __repr__ = __str__ - def _set_desc(self, input): + def set_desc(self, input): """ Set the variable description. @@ -548,43 +552,43 @@ class Variable(object): self.desc = input @property - def _stop_gradient(self): - if _in_dygraph_mode(): + def stop_gradient(self): + if in_dygraph_mode(): return self._ivar.stop_gradient else: - return self.stop_gradient + return self._stop_gradient - @_stop_gradient.setter - def _stop_gradient(self, s): - if _in_dygraph_mode(): + @stop_gradient.setter + def stop_gradient(self, s): + if in_dygraph_mode(): self._ivar.stop_gradient = s else: - self.stop_gradient = s + self._stop_gradient = s @property def persistable(self): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.persistable else: return self.desc.persistable() @persistable.setter def persistable(self, p): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.persistable else: self.desc.set_persistable(p) @property def name(self): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.name else: return cpt.to_text(self.desc.name()) @name.setter def name(self, new_name): - if _in_dygraph_mode(): + if in_dygraph_mode(): self._ivar.name = new_name else: self.desc.set_name(new_name) @@ -592,14 +596,14 @@ class Variable(object): @property def shape(self): # convert to tuple, make it as same as numpy API. - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.shape else: return tuple(self.desc.shape()) @property def dtype(self): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.dtype else: return self.desc.dtype() @@ -611,7 +615,7 @@ class Variable(object): @property def type(self): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self._ivar.dtype else: return self.desc.type() @@ -721,7 +725,7 @@ class Variable(object): name=unique_name.generate(".".join(self.name)), dtype=self.dtype, persistable=self.persistable, - stop_gradient=self._stop_gradient, ) + stop_gradient=self.stop_gradient, ) else: return self @@ -930,7 +934,7 @@ class Operator(object): inputs=None, outputs=None, attrs=None): - if _in_dygraph_mode(): + if in_dygraph_mode(): if type is None: raise ValueError( "`type` to initialized an Operator can not be None.") @@ -1049,7 +1053,7 @@ class Operator(object): for arg in out_args: out_arg_names.append(cpt.to_text(arg.name)) # TODO(minqiyang): could we remove variable's op in static mode? - if not _in_dygraph_mode(): + if not in_dygraph_mode(): arg.op = self self.desc.set_output(out_proto.name, out_arg_names) @@ -1095,7 +1099,7 @@ class Operator(object): @property def type(self): - if _in_dygraph_mode(): + if in_dygraph_mode(): return self.iop.type else: return self.desc.type() @@ -1638,7 +1642,7 @@ class Block(object): Returns: Operator: the append Operator. """ - if _in_dygraph_mode(): + if in_dygraph_mode(): op = Operator( block=self, desc=None, @@ -1710,7 +1714,7 @@ class Block(object): return self.ops[start:end] def _prepend_op(self, *args, **kwargs): - if _in_dygraph_mode(): + if in_dygraph_mode(): op = Operator( self, None, diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 6aff93dcea..da2591b980 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -165,7 +165,7 @@ class ConstantInitializer(Initializer): 'force_cpu': self._force_cpu or force_init_on_cpu() }, stop_gradient=True) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -245,7 +245,7 @@ class UniformInitializer(Initializer): attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -324,7 +324,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -509,7 +509,7 @@ class XavierInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -610,7 +610,7 @@ class MSRAInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -709,7 +709,7 @@ class BilinearInitializer(Initializer): 'shape': list(shape), value_name: values }) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op @@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer): value_name: values }, stop_gradient=True) - if not framework._in_dygraph_mode(): + if not framework.in_dygraph_mode(): var.op = op return op diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 7eb912645e..11e3c4938b 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -17,7 +17,7 @@ from __future__ import print_function import copy import six -from .framework import Parameter, dtype_is_floating, _in_dygraph_mode +from .framework import Parameter, dtype_is_floating, in_dygraph_mode from . import unique_name from paddle.fluid.initializer import Constant, Xavier from .param_attr import ParamAttr diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 869a5f54e9..9eed00b161 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -17,7 +17,7 @@ from __future__ import print_function import copy import numpy as np -from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place +from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place from . import unique_name from .param_attr import ParamAttr, WeightNormParamAttr from . import core @@ -54,7 +54,7 @@ class LayerHelperBase(object): Return Variable construct from value """ if isinstance(value, np.ndarray): - assert _in_dygraph_mode( + assert in_dygraph_mode( ), "to_variable could only be called in dygraph mode" if not block: @@ -302,7 +302,7 @@ class LayerHelperBase(object): param = self._create_weight_normalize(attr, shape, dtype) WeightNormParamAttr.params_with_weight_norm.append(param) return param - if _in_dygraph_mode(): + if in_dygraph_mode(): # In dygraph mode, we want the returned parameter to be # initialized so that it can be used imperatively. return self.main_program.global_block().create_parameter( @@ -370,7 +370,7 @@ class LayerHelperBase(object): initializer: initializer to use """ assert isinstance(var, Variable) - if _in_dygraph_mode(): + if in_dygraph_mode(): initializer(var, var.block) else: self.startup_program.global_block().create_var( diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 91414fdeb2..e69298d52b 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -23,7 +23,7 @@ import os import inspect from ..layer_helper import LayerHelper from ..initializer import Normal, Constant, NumpyArrayInitializer -from ..framework import Variable, OpProtoHolder, _in_dygraph_mode +from ..framework import Variable, OpProtoHolder, in_dygraph_mode from ..dygraph import base from ..param_attr import ParamAttr from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ @@ -3288,7 +3288,7 @@ def layer_norm(input, >>> dtype='float32') >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) """ - assert _in_dygraph_mode( + assert in_dygraph_mode( ) is not True, "please use FC instead of fc in dygraph mode!" helper = LayerHelper('layer_norm', **locals()) dtype = helper.input_dtype() @@ -6454,7 +6454,7 @@ def squeeze(input, axes, name=None): x = layers.data(name='x', shape=[5, 1, 10]) y = layers.sequeeze(input=x, axes=[1]) """ - assert not _in_dygraph_mode(), ( + assert not in_dygraph_mode(), ( "squeeze layer is not supported in dygraph mode yet.") helper = LayerHelper("squeeze", **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) @@ -9193,7 +9193,7 @@ def _elementwise_op(helper): op_type = helper.layer_type x = helper.kwargs.get('x', None) y = helper.kwargs.get('y', None) - if _in_dygraph_mode(): + if in_dygraph_mode(): x = base.to_variable(x) y = base.to_variable(y) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 02ace724c8..c3b7aee2b4 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -55,7 +55,7 @@ class Optimizer(object): """ def __init__(self, learning_rate, regularization=None, name=None): - if framework._in_dygraph_mode(): + if framework.in_dygraph_mode(): if not isinstance(learning_rate, float) and \ not isinstance(learning_rate, LearningRateDecay): raise TypeError( @@ -205,7 +205,7 @@ class Optimizer(object): name = self._name + "_" + name if (name in self._accumulators and param.name in self._accumulators[name]): - if framework._in_dygraph_mode(): + if framework.in_dygraph_mode(): return self._accumulators[name][param.name] raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) @@ -275,7 +275,7 @@ class Optimizer(object): self._create_global_learning_rate() optimize_ops = [] - if framework._in_dygraph_mode(): + if framework.in_dygraph_mode(): for param_and_grad in parameters_and_grads: if param_and_grad[1] is None: continue @@ -374,7 +374,7 @@ class Optimizer(object): See examples in `apply_gradients`. """ self._dtype = loss.dtype - if framework._in_dygraph_mode(): + if framework.in_dygraph_mode(): if parameter_list is not None: parameters = parameter_list else: @@ -459,7 +459,7 @@ class Optimizer(object): Returns: list: A list of operators appended to the current program. """ - if framework._in_dygraph_mode(): + if framework.in_dygraph_mode(): with program_guard(framework.default_main_program(), framework.default_startup_program()): optimize_ops = self._create_optimization_pass(params_grads) diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 9cb88d4a85..04a36f7caf 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -18,7 +18,7 @@ import numpy as np import paddle.fluid as fluid -class L1(fluid.dygraph.Layer): +class L1(fluid.Layer): def __init__(self, prefix): super(L1, self).__init__(prefix) self._param_attr = fluid.ParamAttr( @@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer): return self.w1 + self.w2 -class L2(fluid.dygraph.Layer): +class L2(fluid.Layer): def __init__(self, prefix): super(L2, self).__init__(prefix) self.layer1 = L1(self.full_name()) @@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer): return self.layer1() + self.layer2() -class L3(fluid.dygraph.Layer): +class L3(fluid.Layer): def __init__(self, prefix): super(L3, self).__init__(prefix) self.layer1 = L2(self.full_name()) @@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase): ret = l() self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1") - self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) + self.assertTrue(np.allclose(ret.numpy(), 0.2 * np.ones([2, 2]))) def test_three_level(self): with fluid.dygraph.guard(): @@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase): self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1") self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0") self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1") - self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2]))) + self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2]))) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 13f2d66217..bc95b90ce4 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -18,11 +18,11 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.dygraph.nn import FC +from paddle.fluid import FC from test_imperative_base import new_program_scope -class MyLayer(fluid.dygraph.Layer): +class MyLayer(fluid.Layer): def __init__(self, name_scope): super(MyLayer, self).__init__(name_scope) @@ -34,7 +34,7 @@ class MyLayer(fluid.dygraph.Layer): return [x] -class MyPyLayer(fluid.dygraph.PyLayer): +class MyPyLayer(fluid.PyLayer): def __init__(self): super(MyPyLayer, self).__init__() @@ -48,7 +48,7 @@ class MyPyLayer(fluid.dygraph.PyLayer): return np.array(dout) * (1 - np.square(np.array(out))) -class MLP(fluid.dygraph.Layer): +class MLP(fluid.Layer): def __init__(self, name_scope): super(MLP, self).__init__(name_scope) self._fc1 = FC(self.full_name(), @@ -71,7 +71,7 @@ class MLP(fluid.dygraph.Layer): return x -class SimpleRNNCell(fluid.dygraph.Layer): +class SimpleRNNCell(fluid.Layer): def __init__(self, name_scope, step_input_size, hidden_size, output_size, param_attr): super(SimpleRNNCell, self).__init__(name_scope) @@ -81,7 +81,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): self._dtype = core.VarDesc.VarType.FP32 self.param_attr = param_attr - def _build_once(self, inputs, pre_hidden): + def build_once(self, inputs, pre_hidden): i2h_param_shape = [self.step_input_size, self.hidden_size] h2h_param_shape = [self.hidden_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size] @@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.dygraph.Layer): return reduce_out, hidden -class SimpleRNN(fluid.dygraph.Layer): +class SimpleRNN(fluid.Layer): def __init__(self, name_scope): super(SimpleRNN, self).__init__(name_scope) self.seq_len = 4 @@ -200,22 +200,22 @@ class TestImperative(unittest.TestCase): inputs.append(fluid.dygraph.base.to_variable(x)) ret = fluid.layers.sums(inputs) loss = fluid.layers.reduce_sum(ret) - loss._backward() - self.assertTrue(np.allclose(ret._numpy(), x * 10)) - self.assertTrue(np.allclose(inputs[0]._gradient(), x)) + loss.backward() + self.assertTrue(np.allclose(ret.numpy(), x * 10)) + self.assertTrue(np.allclose(inputs[0].gradient(), x)) def test_layer(self): with fluid.dygraph.guard(): cl = core.Layer() cl.forward([]) - l = fluid.dygraph.Layer("l") + l = fluid.Layer("l") self.assertRaises(NotImplementedError, l.forward, []) def test_pylayer_func_id(self): with fluid.dygraph.guard(): - class PyLayer1(fluid.dygraph.PyLayer): + class PyLayer1(fluid.PyLayer): def __init__(self): super(PyLayer1, self).__init__() @@ -257,9 +257,9 @@ class TestImperative(unittest.TestCase): my_py_layer = MyPyLayer() var_inp = fluid.dygraph.base.to_variable(np_inp) outs = my_py_layer(var_inp) - dy_out = np.sum(outs[0]._numpy()) - outs[0]._backward() - dy_grad = var_inp._gradient() + dy_out = np.sum(outs[0].numpy()) + outs[0].backward() + dy_grad = var_inp.gradient() with new_program_scope(): inp = fluid.layers.data( @@ -287,9 +287,9 @@ class TestImperative(unittest.TestCase): l = MyLayer("my_layer") x = l(var_inp)[0] self.assertIsNotNone(x) - dy_out = x._numpy() - x._backward() - dy_grad = l._x_for_debug._gradient() + dy_out = x.numpy() + x.backward() + dy_grad = l._x_for_debug.gradient() with new_program_scope(): inp = fluid.layers.data( @@ -314,9 +314,9 @@ class TestImperative(unittest.TestCase): var_inp = fluid.dygraph.base.to_variable(np_inp) mlp = MLP("mlp") out = mlp(var_inp) - dy_out = out._numpy() - out._backward() - dy_grad = mlp._fc1._w._gradient() + dy_out = out.numpy() + out.backward() + dy_grad = mlp._fc1._w.gradient() with new_program_scope(): inp = fluid.layers.data( @@ -358,11 +358,11 @@ class TestImperative(unittest.TestCase): var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) simple_rnn = SimpleRNN("simple_rnn") outs, pre_hiddens = simple_rnn.forward(var_inp) - dy_out = outs[3]._numpy() - outs[3]._backward() - dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() - dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() - dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() + dy_out = outs[3].numpy() + outs[3].backward() + dy_grad_h2o = simple_rnn._cell._h2o_w.gradient() + dy_grad_h2h = simple_rnn._cell._h2h_w.gradient() + dy_grad_i2h = simple_rnn._cell._i2h_w.gradient() with new_program_scope(): inp = fluid.layers.data( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py index a92b7d62fa..c28058100a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py @@ -18,11 +18,11 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid import Conv2D, Pool2D, FC from paddle.fluid.dygraph.base import to_variable -class SimpleImgConvPool(fluid.dygraph.Layer): +class SimpleImgConvPool(fluid.Layer): def __init__(self, name_scope, num_channels, @@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.dygraph.Layer): return x -class MNIST(fluid.dygraph.Layer): +class MNIST(fluid.Layer): def __init__(self, name_scope): super(MNIST, self).__init__(name_scope) @@ -125,21 +125,21 @@ class TestDygraphCheckpoint(unittest.TestCase): img = to_variable(dy_x_data) label = to_variable(y_data) - label._stop_gradient = True + label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) - dy_out = avg_loss._numpy() + dy_out = avg_loss.numpy() - avg_loss._backward() + avg_loss.backward() sgd.minimize(avg_loss) fluid.dygraph.save_persistables(mnist, "save_dir") mnist.clear_gradients() for param in mnist.parameters(): - dy_param_init_value[param.name] = param._numpy() + dy_param_init_value[param.name] = param.numpy() mnist.load_dict( fluid.dygraph.load_persistables(mnist, "save_dir")) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index ccebd4a547..ca2cffa9c7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5)) NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1)) -class DMF(fluid.dygraph.Layer): +class DMF(fluid.Layer): def __init__(self, name_scope): super(DMF, self).__init__(name_scope) - self._user_latent = fluid.dygraph.FC(self.full_name(), 256) - self._item_latent = fluid.dygraph.FC(self.full_name(), 256) + self._user_latent = fluid.FC(self.full_name(), 256) + self._item_latent = fluid.FC(self.full_name(), 256) self._user_layers = [] self._item_layers = [] @@ -45,13 +45,11 @@ class DMF(fluid.dygraph.Layer): self._user_layers.append( self.add_sublayer( 'user_layer_%d' % i, - fluid.dygraph.FC( - self.full_name(), self._hid_sizes[i], act='relu'))) + fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) self._item_layers.append( self.add_sublayer( 'item_layer_%d' % i, - fluid.dygraph.FC( - self.full_name(), self._hid_sizes[i], act='relu'))) + fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) def forward(self, users, items): users = self._user_latent(users) @@ -63,19 +61,18 @@ class DMF(fluid.dygraph.Layer): return fluid.layers.elementwise_mul(users, items) -class MLP(fluid.dygraph.Layer): +class MLP(fluid.Layer): def __init__(self, name_scope): super(MLP, self).__init__(name_scope) - self._user_latent = fluid.dygraph.FC(self.full_name(), 256) - self._item_latent = fluid.dygraph.FC(self.full_name(), 256) + self._user_latent = fluid.FC(self.full_name(), 256) + self._item_latent = fluid.FC(self.full_name(), 256) self._match_layers = [] self._hid_sizes = [128, 64] for i in range(len(self._hid_sizes)): self._match_layers.append( self.add_sublayer( 'match_layer_%d' % i, - fluid.dygraph.FC( - self.full_name(), self._hid_sizes[i], act='relu'))) + fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) self._mat def forward(self, users, items): @@ -88,7 +85,7 @@ class MLP(fluid.dygraph.Layer): return match_vec -class DeepCF(fluid.dygraph.Layer): +class DeepCF(fluid.Layer): def __init__(self, name_scope, num_users, num_items, matrix): super(DeepCF, self).__init__(name_scope) self._num_users = num_users @@ -99,11 +96,11 @@ class DeepCF(fluid.dygraph.Layer): matrix.dtype, is_bias=False, default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) - self._rating_matrix._stop_gradient = True + self._rating_matrix.stop_gradient = True self._mlp = MLP(self.full_name()) self._dmf = DMF(self.full_name()) - self._match_fc = fluid.dygraph.FC(self.full_name(), 1, act='sigmoid') + self._match_fc = fluid.FC(self.full_name(), 1, act='sigmoid') def forward(self, users, items): # users_emb = self._user_emb(users) @@ -255,10 +252,10 @@ class TestDygraphDeepCF(unittest.TestCase): fluid.layers.log_loss(prediction, to_variable(labels_np[ slice:slice + BATCH_SIZE]))) - loss._backward() + loss.backward() adam.minimize(loss) deepcf.clear_gradients() - dy_loss = loss._numpy() + dy_loss = loss.numpy() sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss)) self.assertEqual(static_loss, dy_loss) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py index 58faa1cb85..5d773ec1c9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -22,12 +22,12 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid import Conv2D, Pool2D, FC from test_imperative_base import new_program_scope from paddle.fluid.dygraph.base import to_variable -class Discriminator(fluid.dygraph.Layer): +class Discriminator(fluid.Layer): def __init__(self, name_scope): super(Discriminator, self).__init__(name_scope) self._fc1 = FC(self.full_name(), size=32, act='elu') @@ -38,7 +38,7 @@ class Discriminator(fluid.dygraph.Layer): return self._fc2(x) -class Generator(fluid.dygraph.Layer): +class Generator(fluid.Layer): def __init__(self, name_scope): super(Generator, self).__init__(name_scope) self._fc1 = FC(self.full_name(), size=64, act='elu') @@ -150,7 +150,7 @@ class TestDygraphGAN(unittest.TestCase): x=d_fake, label=to_variable(np.zeros([2, 1], np.float32)))) d_loss = d_loss_real + d_loss_fake - d_loss._backward() + d_loss.backward() sgd.minimize(d_loss) discriminator.clear_gradients() generator.clear_gradients() @@ -160,15 +160,15 @@ class TestDygraphGAN(unittest.TestCase): g_loss = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, label=to_variable(np.ones([2, 1], np.float32)))) - g_loss._backward() + g_loss.backward() sgd.minimize(g_loss) for p in discriminator.parameters(): - dy_params[p.name] = p._numpy() + dy_params[p.name] = p.numpy() for p in generator.parameters(): - dy_params[p.name] = p._numpy() + dy_params[p.name] = p.numpy() - dy_g_loss = g_loss._numpy() - dy_d_loss = d_loss._numpy() + dy_g_loss = g_loss.numpy() + dy_d_loss = d_loss.numpy() self.assertEqual(dy_g_loss, static_g_loss) self.assertEqual(dy_d_loss, static_d_loss) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py index a8fb9ecfe4..234fcd6040 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py @@ -15,14 +15,12 @@ import contextlib import unittest import numpy as np -import six import sys import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from test_imperative_base import new_program_scope from paddle.fluid.dygraph.base import to_variable @@ -31,7 +29,7 @@ def gen_data(): pass -class GraphConv(fluid.dygraph.Layer): +class GraphConv(fluid.Layer): def __init__(self, name_scope, in_features, out_features): super(GraphConv, self).__init__(name_scope) @@ -50,7 +48,7 @@ class GraphConv(fluid.dygraph.Layer): return fluid.layers.matmul(adj, support) + self.bias -class GCN(fluid.dygraph.Layer): +class GCN(fluid.Layer): def __init__(self, name_scope, num_hidden): super(GCN, self).__init__(name_scope) self.gc = GraphConv(self.full_name(), num_hidden, 32) @@ -134,10 +132,9 @@ class TestDygraphGNN(unittest.TestCase): loss = fluid.layers.reduce_sum(loss) adam = AdamOptimizer(learning_rate=1e-3) adam.minimize(loss) - self.assertEqual(static_loss, loss._numpy()) - self.assertTrue( - np.allclose(static_weight, model.gc.weight._numpy())) - sys.stderr.write('%s %s\n' % (static_loss, loss._numpy())) + self.assertEqual(static_loss, loss.numpy()) + self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy())) + sys.stderr.write('%s %s\n' % (static_loss, loss.numpy())) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index 5ab01839fb..76b8d3aa39 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -128,25 +128,25 @@ class TestImperativeMnist(unittest.TestCase): img = to_variable(dy_x_data) label = to_variable(y_data) - label._stop_gradient = True + label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) - dy_out = avg_loss._numpy() + dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in mnist.parameters(): - dy_param_init_value[param.name] = param._numpy() + dy_param_init_value[param.name] = param.numpy() - avg_loss._backward() + avg_loss.backward() sgd.minimize(avg_loss) mnist.clear_gradients() dy_param_value = {} for param in mnist.parameters(): - dy_param_value[param.name] = param._numpy() + dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 8b659a3e08..b9f93119e8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -28,7 +28,7 @@ from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope -class MLP(fluid.dygraph.Layer): +class MLP(fluid.Layer): def __init__(self, name_scope, param_attr=None, bias_attr=None): super(MLP, self).__init__(name_scope) @@ -75,18 +75,18 @@ class TestImperativeOptimizerBase(unittest.TestCase): cost = mlp(img) avg_loss = fluid.layers.reduce_mean(cost) - dy_out = avg_loss._numpy() + dy_out = avg_loss.numpy() if batch_id == 0: for param in mlp.parameters(): - dy_param_init_value[param.name] = param._numpy() + dy_param_init_value[param.name] = param.numpy() - avg_loss._backward() + avg_loss.backward() optimizer.minimize(avg_loss) mlp.clear_gradients() dy_param_value = {} for param in mlp.parameters(): - dy_param_value[param.name] = param._numpy() + dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index eb8a82430f..06ee5f7514 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -24,10 +24,9 @@ from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope import numpy as np import six -from paddle.fluid.backward import append_backward -class SimpleLSTMRNN(fluid.dygraph.Layer): +class SimpleLSTMRNN(fluid.Layer): def __init__(self, name_scope, hidden_size, @@ -45,7 +44,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): self.cell_array = [] self.hidden_array = [] - def _build_once(self, input_embedding, init_hidden=None, init_cell=None): + def build_once(self, input_embedding, init_hidden=None, init_cell=None): self.weight_1_arr = [] self.weight_2_arr = [] self.bias_arr = [] @@ -132,7 +131,7 @@ class SimpleLSTMRNN(fluid.dygraph.Layer): return real_res, last_hidden, last_cell -class PtbModel(fluid.dygraph.Layer): +class PtbModel(fluid.Layer): def __init__(self, name_scope, hidden_size, @@ -177,7 +176,7 @@ class PtbModel(fluid.dygraph.Layer): default_initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale)) - def _build_once(self, input, label, init_hidden, init_cell): + def build_once(self, input, label, init_hidden, init_cell): pass def forward(self, input, label, init_hidden, init_cell): @@ -260,13 +259,13 @@ class TestDygraphPtbRnn(unittest.TestCase): init_cell) if i == 0: for param in ptb_model.parameters(): - dy_param_init[param.name] = param._numpy() - dy_loss._backward() + dy_param_init[param.name] = param.numpy() + dy_loss.backward() sgd.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): - dy_param_updated[param.name] = param._numpy() + dy_param_updated[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -333,10 +332,10 @@ class TestDygraphPtbRnn(unittest.TestCase): for k in range(3, len(out)): static_param_updated[static_param_name_list[k - 3]] = out[k] - self.assertTrue(np.allclose(static_loss_value, dy_loss._numpy())) - self.assertTrue(np.allclose(static_last_cell_value, last_cell._numpy())) + self.assertTrue(np.allclose(static_loss_value, dy_loss.numpy())) + self.assertTrue(np.allclose(static_last_cell_value, last_cell.numpy())) self.assertTrue( - np.allclose(static_last_hidden_value, last_hidden._numpy())) + np.allclose(static_last_hidden_value, last_hidden.numpy())) for key, value in six.iteritems(static_param_init): # print("static_init name: {}, value {}".format(key, value)) # print("dy_init name: {}, value {}".format(key, dy_param_init[key])) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 1d786d5846..d9ef08b3c4 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -21,7 +21,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC +from paddle.fluid import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope @@ -68,7 +68,7 @@ def optimizer_setting(params): return optimizer -class ConvBNLayer(fluid.dygraph.Layer): +class ConvBNLayer(fluid.Layer): def __init__(self, name_scope, num_channels, @@ -99,7 +99,7 @@ class ConvBNLayer(fluid.dygraph.Layer): return y -class BottleneckBlock(fluid.dygraph.Layer): +class BottleneckBlock(fluid.Layer): def __init__(self, name_scope, num_channels, @@ -156,7 +156,7 @@ class BottleneckBlock(fluid.dygraph.Layer): return layer_helper.append_activation(y) -class ResNet(fluid.dygraph.Layer): +class ResNet(fluid.Layer): def __init__(self, name_scope, layers=50, class_dim=102): super(ResNet, self).__init__(name_scope) @@ -247,7 +247,7 @@ class TestDygraphResnet(unittest.TestCase): dy_param_init_value = {} for param in resnet.parameters(): - dy_param_init_value[param.name] = param._numpy() + dy_param_init_value[param.name] = param.numpy() for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: @@ -260,20 +260,20 @@ class TestDygraphResnet(unittest.TestCase): img = to_variable(dy_x_data) label = to_variable(y_data) - label._stop_gradient = True + label.stop_gradient = True out = resnet(img) loss = fluid.layers.cross_entropy(input=out, label=label) avg_loss = fluid.layers.mean(x=loss) - dy_out = avg_loss._numpy() + dy_out = avg_loss.numpy() if batch_id == 0: for param in resnet.parameters(): if param.name not in dy_param_init_value: - dy_param_init_value[param.name] = param._numpy() + dy_param_init_value[param.name] = param.numpy() - avg_loss._backward() + avg_loss.backward() dy_grad_value = {} for param in resnet.parameters(): @@ -288,7 +288,7 @@ class TestDygraphResnet(unittest.TestCase): dy_param_value = {} for param in resnet.parameters(): - dy_param_value[param.name] = param._numpy() + dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py index 6f87051dc4..90457cc664 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer.py @@ -16,7 +16,8 @@ from __future__ import print_function import unittest import paddle.fluid as fluid -from paddle.fluid.dygraph import Embedding, LayerNorm, FC, to_variable, Layer, guard +from paddle.fluid import Embedding, LayerNorm, FC, Layer +from paddle.fluid.dygraph import to_variable, guard from test_imperative_base import new_program_scope from paddle.fluid import core import numpy as np @@ -985,15 +986,15 @@ class TestDygraphTransformer(unittest.TestCase): if i == 0: for param in transformer.parameters(): - dy_param_init[param.name] = param._numpy() + dy_param_init[param.name] = param.numpy() - dy_avg_cost._backward() + dy_avg_cost.backward() optimizer.minimize(dy_avg_cost) transformer.clear_gradients() if i == batch_num - 1: for param in transformer.parameters(): - dy_param_updated[param.name] = param._numpy() + dy_param_updated[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -1069,13 +1070,13 @@ class TestDygraphTransformer(unittest.TestCase): 4]] = out[k] self.assertTrue( - np.array_equal(static_avg_cost_value, dy_avg_cost._numpy())) + np.array_equal(static_avg_cost_value, dy_avg_cost.numpy())) self.assertTrue( - np.array_equal(static_sum_cost_value, dy_sum_cost._numpy())) + np.array_equal(static_sum_cost_value, dy_sum_cost.numpy())) self.assertTrue( - np.array_equal(static_predict_value, dy_predict._numpy())) + np.array_equal(static_predict_value, dy_predict.numpy())) self.assertTrue( - np.array_equal(static_token_num_value, dy_token_num._numpy())) + np.array_equal(static_token_num_value, dy_token_num.numpy())) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 6cc3c6d90b..5b56644700 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -102,7 +102,7 @@ class TestLayer(LayerTest): dy_ret = lm(base.to_variable(inp)) self.assertTrue(np.allclose(static_ret, static_ret2)) - self.assertTrue(np.allclose(dy_ret._numpy(), static_ret2)) + self.assertTrue(np.allclose(dy_ret.numpy(), static_ret2)) def test_relu(self): with self.static_graph(): @@ -116,7 +116,7 @@ class TestLayer(LayerTest): t = np.ones([3, 3], dtype='float32') dy_ret = layers.relu(base.to_variable(t)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) def test_matmul(self): with self.static_graph(): @@ -137,7 +137,7 @@ class TestLayer(LayerTest): t2 = np.ones([3, 3], dtype='float32') dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) def test_conv2d(self): with self.static_graph(): @@ -164,7 +164,7 @@ class TestLayer(LayerTest): 'conv2d', num_channels=3, num_filters=3, filter_size=[2, 2]) dy_ret = conv2d(base.to_variable(images)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) self.assertTrue(np.allclose(static_ret, static_ret2)) def test_gru_unit(self): @@ -206,7 +206,7 @@ class TestLayer(LayerTest): for i in range(len(static_ret)): self.assertTrue(np.allclose(static_ret[i], static_ret2[i])) - self.assertTrue(np.allclose(static_ret[i], dy_ret[i]._numpy())) + self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy())) def test_elementwise_math(self): n = np.ones([3, 3], dtype='float32') @@ -248,8 +248,8 @@ class TestLayer(LayerTest): ret = layers.elementwise_sub(ret, n5) dy_ret = layers.elementwise_mul(ret, n6) self.assertTrue( - np.allclose(static_ret, dy_ret._numpy()), - '%s vs %s' % (static_ret, dy_ret._numpy())) + np.allclose(static_ret, dy_ret.numpy()), + '%s vs %s' % (static_ret, dy_ret.numpy())) def test_elementwise_minmax(self): n = np.ones([3, 3], dtype='float32') @@ -259,8 +259,8 @@ class TestLayer(LayerTest): min_ret = layers.elementwise_min(n, n2) max_ret = layers.elementwise_max(n, n2) - self.assertTrue(np.allclose(n, min_ret._numpy())) - self.assertTrue(np.allclose(n2, max_ret._numpy())) + self.assertTrue(np.allclose(n, min_ret.numpy())) + self.assertTrue(np.allclose(n2, max_ret.numpy())) def test_sequence_conv(self): inp_np = np.arange(12).reshape([3, 4]).astype('float32') @@ -327,7 +327,7 @@ class TestLayer(LayerTest): 'conv2d_transpose', num_filters=10, output_size=28) dy_rlt = conv2d_transpose(base.to_variable(inp_np)) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt)) def test_bilinear_tensor_product(self): inp_np_x = np.array([[1, 2, 3]]).astype('float32') @@ -370,7 +370,7 @@ class TestLayer(LayerTest): dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt)) def test_prelu(self): inp_np = np.ones([5, 200, 100, 100]).astype('float32') @@ -411,7 +411,7 @@ class TestLayer(LayerTest): dy_rlt = prelu(base.to_variable(inp_np)) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt)) def test_embeding(self): inp_word = np.array([[[1]]]).astype('int64') @@ -444,7 +444,7 @@ class TestLayer(LayerTest): static_rlt3 = emb2(base.to_variable(inp_word)) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt)) + self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt)) def test_nce(self): window_size = 5 @@ -558,7 +558,7 @@ class TestLayer(LayerTest): nce_loss3 = nce(embs3, words[label_word]) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) + self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt)) def test_conv3d(self): with self.static_graph(): @@ -585,7 +585,7 @@ class TestLayer(LayerTest): conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) dy_ret = conv3d(base.to_variable(images)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) self.assertTrue(np.allclose(static_ret, static_ret2)) def test_row_conv(self): @@ -679,7 +679,7 @@ class TestLayer(LayerTest): groupNorm = nn.GroupNorm('GroupNorm', groups=2) dy_ret = groupNorm(base.to_variable(input)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) self.assertTrue(np.allclose(static_ret, static_ret2)) def test_spectral_norm(self): @@ -729,7 +729,7 @@ class TestLayer(LayerTest): spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) dy_ret = spectralNorm(base.to_variable(input)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) self.assertTrue(np.allclose(static_ret, static_ret2)) def test_tree_conv(self): @@ -802,7 +802,7 @@ class TestLayer(LayerTest): dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) self.assertTrue(np.allclose(static_ret, static_ret2)) - self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, dy_ret.numpy())) def test_conv3d_transpose(self): input_array = np.arange(0, 48).reshape( @@ -832,7 +832,7 @@ class TestLayer(LayerTest): use_cudnn=False) dy_rlt = conv3d_transpose(base.to_variable(input_array)) self.assertTrue(np.allclose(static_rlt2, static_rlt)) - self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt)) class TestBook(unittest.TestCase): -- GitLab