diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 53d50a8b4a3ed378aa203f9458a4dc440e080716..8045c8cb5a62c49ff32c2e758c3985cecb568a51 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -99,7 +99,6 @@ from . import compiler from .compiler import * from paddle.fluid.layers.math_op_patch import monkey_patch_variable from . import install_check -from .dygraph.nn import * from .dygraph.layers import * from .dygraph.base import enable_dygraph, disable_dygraph from .io import save, load, load_program_state, set_program_state diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index aebcc09eaa14ba8242b7ea4041f5816a5259cc7c..b98c188ae4f6ab3ecd191940431bb86d84ddccc7 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -21,9 +21,6 @@ from .layers import * from . import container from .container import * -from . import nn -from .nn import * - from . import tracer from .tracer import * @@ -45,7 +42,6 @@ __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ __all__ += container.__all__ -__all__ += nn.__all__ __all__ += parallel.__all__ __all__ += checkpoint.__all__ __all__ += learning_rate_scheduler.__all__ diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py deleted file mode 100644 index f6009912bee9062a4a8478237a41c9168af50782..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/dygraph/nn.py +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from .. import core -from ..layers import utils -from ..layers import nn as F -from .. import dygraph_utils -from . import layers -from ..framework import ( - Variable, - OpProtoHolder, - Parameter, - _dygraph_tracer, - _varbase_creator, - default_main_program, - _global_flags, - in_dygraph_mode, -) - -from ..data_feeder import ( - convert_dtype, - check_variable_and_dtype, - check_type, - check_dtype, -) - -from ..param_attr import ParamAttr -from ..initializer import Normal, Constant, NumpyArrayInitializer -from .. import unique_name -from .layer_object_helper import LayerObjectHelper -from ..data_feeder import check_variable_and_dtype, check_type -import numpy as np -import numbers -import logging -import os -import paddle.utils.deprecated as deprecated -from paddle import _C_ops, _legacy_C_ops - -__all__ = [] - - -class BatchNorm(layers.Layer): - r""" - - This interface is used to construct a callable object of the ``BatchNorm`` class. - For more details, refer to code examples. - It implements the function of the Batch Normalization Layer and can be used - as a normalizer function for conv2d and fully connected operations. - The data is normalized by the mean and variance of the channel based on the current batch data. - Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing - Internal Covariate Shift `_ - for more details. - - When use_global_stats = False, the :math:`\mu_{\beta}` - and :math:`\sigma_{\beta}^{2}` are the statistics of one mini-batch. - Calculated as follows: - - .. math:: - - \mu_{\beta} &\gets \frac{1}{m} \sum_{i=1}^{m} x_i \qquad & - //\ mini-batch\ mean \\ - \sigma_{\beta}^{2} &\gets \frac{1}{m} \sum_{i=1}^{m}(x_i - \mu_{\beta})^2 \qquad & - //\ mini-batch\ variance \\ - - - :math:`x` : mini-batch data - - :math:`m` : the size of the mini-batch data - - When use_global_stats = True, the :math:`\\mu_{\\beta}` - and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. - They are global or running statistics (moving_mean and moving_variance). It usually got from the - pre-trained model. Calculated as follows: - - .. math:: - moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\ - moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\ - - The normalization function formula is as follows: - - .. math:: - - \hat{x_i} &\gets \frac{x_i - \mu_\beta} {\sqrt{\ - \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ - y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift - - - - :math:`\epsilon` : add a smaller value to the variance to prevent division by zero - - :math:`\gamma` : trainable proportional parameter - - :math:`\beta` : trainable deviation parameter - - Parameters: - num_channels(int): Indicate the number of channels of the input ``Tensor``. - act(str, optional): Activation to be applied to the output of batch normalization. Default: None. - is_test (bool, optional): A flag indicating whether it is in test phrase or not. - This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``. - Default: False. - momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. - epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. - param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` - of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm - will create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. - If it is set to None or one attribute of ParamAttr, batch_norm - will create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - dtype(str, optional): Indicate the data type of the input ``Tensor``, - which can be float32 or float64. Default: float32. - data_layout(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC", where `N` is batch size, `C` is the number of the feature map, `H` is the height of the feature map, `W` is the width of the feature map. Default: NCHW. - in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False. - moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None. - moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None. - do_model_average_for_mean_and_var(bool, optional): Whether parameter mean and variance should do model - average when model average is enabled. Default: True. - use_global_stats(bool, optional): Whether to use global mean and - variance. In inference or test mode, set use_global_stats to true - or is_test to true, and the behavior is equivalent. - In train mode, when setting use_global_stats True, the global mean - and variance are also used during train period. Default: False. - trainable_statistics(bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when - setting trainable_statistics True, mean and variance will be calculated by current batch statistics. - Default: False. - - Returns: - None - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - from paddle.fluid.dygraph.base import to_variable - - x = paddle.rand([3, 10, 3, 7], 'float32') - with fluid.dygraph.guard(): - x = to_variable(x) - batch_norm = fluid.BatchNorm(10) - hidden1 = batch_norm(x) - """ - - def __init__( - self, - num_channels, - act=None, - is_test=False, - momentum=0.9, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - dtype='float32', - data_layout='NCHW', - in_place=False, - moving_mean_name=None, - moving_variance_name=None, - do_model_average_for_mean_and_var=True, - use_global_stats=False, - trainable_statistics=False, - ): - super().__init__() - self._param_attr = param_attr - self._bias_attr = bias_attr - self._act = act - self._use_mkldnn = _global_flags()["FLAGS_use_mkldnn"] - - assert ( - bias_attr is not False - ), "bias_attr should not be False in batch_norm." - - if dtype == "float16": - self._dtype = "float32" - else: - self._dtype = dtype - - param_shape = [num_channels] - - # create parameter - self.weight = self.create_parameter( - attr=self._param_attr, - shape=param_shape, - dtype=self._dtype, - default_initializer=Constant(1.0), - ) - self.weight.stop_gradient = ( - use_global_stats and self._param_attr.learning_rate == 0.0 - ) - - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=param_shape, - dtype=self._dtype, - is_bias=True, - ) - self.bias.stop_gradient = ( - use_global_stats and self._param_attr.learning_rate == 0.0 - ) - - self._mean = self.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=self._dtype, - ) - self._mean.stop_gradient = True - - self._variance = self.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=self._dtype, - ) - self._variance.stop_gradient = True - - self._in_place = in_place - self._data_layout = data_layout - self._momentum = momentum - self._epsilon = epsilon - self._is_test = is_test - self._fuse_with_relu = False - self._use_global_stats = use_global_stats - self._trainable_statistics = trainable_statistics - - def forward(self, input): - # create output - # mean and mean_out share the same memory - mean_out = self._mean - # variance and variance out share the same memory - variance_out = self._variance - - if in_dygraph_mode(): - batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( - input, - self._mean, - self._variance, - self.weight, - self.bias, - not self.training, - self._momentum, - self._epsilon, - self._data_layout, - self._use_global_stats, - self._trainable_statistics, - ) - return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn - ) - else: - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm' - ) - - attrs = { - "momentum": self._momentum, - "epsilon": self._epsilon, - "is_test": self._is_test, - "data_layout": self._data_layout, - "use_mkldnn": False, - "fuse_with_relu": self._fuse_with_relu, - "use_global_stats": self._use_global_stats, - "trainable_statistics": self._trainable_statistics, - } - - inputs = { - "X": [input], - "Scale": [self.weight], - "Bias": [self.bias], - "Mean": [self._mean], - "Variance": [self._variance], - } - - saved_mean = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - saved_variance = self._helper.create_variable_for_type_inference( - dtype=self._dtype, stop_gradient=True - ) - reserve_space = self._helper.create_variable_for_type_inference( - dtype=self._helper.input_dtype(input), stop_gradient=True - ) - - batch_norm_out = ( - input - if self._in_place - else self._helper.create_variable_for_type_inference( - self._dtype - ) - ) - - outputs = { - "Y": [batch_norm_out], - "MeanOut": [mean_out], - "VarianceOut": [variance_out], - "SavedMean": [saved_mean], - "SavedVariance": [saved_variance], - } - if reserve_space is not None: - outputs["ReserveSpace"] = [reserve_space] - - self._helper.append_op( - type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs - ) - - # Currently, we don't support inplace in dygraph mode - return self._helper.append_activation(batch_norm_out, self._act) diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 5756361f89e46f005072d2136b2e13de4762525b..bf1ad9b107f74694c80472f583287d617fdf0616 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -25,7 +25,7 @@ from .param_attr import ParamAttr from .initializer import Constant from . import layers from . import backward -from .dygraph import Layer, nn +from .dygraph import Layer from . import executor from . import optimizer from . import core