diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index c25960d681a62af1069b23f66f8ca5608808cd6f..8390b55026c895b661cb514714ba92c05a7bf02e 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -21,8 +21,6 @@ namespace paddle { REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer); -const double CudnnBatchNormLayer::MIN_EPS = 1E-5; - bool CudnnBatchNormLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ @@ -61,14 +59,8 @@ void CudnnBatchNormLayer::forward(PassType passType) { real* movingMean = movingMean_->getW()->getData(); real* movingVar = movingVar_->getW()->getData(); - /** - * If epsilon_ equals to 1e-5 and eps_ is assigned the value of - * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error - * will occur due to eps_ value is less than - * CUDNN_BN_MIN_EPSILON. - * The following code is to ensure that the eps_ meets requirement. - */ - eps_ = std::max(MIN_EPS, static_cast(epsilon_)); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); if (!useGlobalStats_) { REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); @@ -137,14 +129,8 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { real* savedMean = savedMean_->getData(); real* savedInvVar = savedInvVar_->getData(); - /** - * If epsilon_ equals to 1e-5 and eps_ is assigned the value of - * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error - * will occur due to eps_ value is less than - * CUDNN_BN_MIN_EPSILON. - * The following code is to ensure that the eps_ meets requirement. - */ - eps_ = std::max(MIN_EPS, static_cast(epsilon_)); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { Matrix::resizeOrCreate(m, h, w, false, true); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index fb7dbc01d178192441c6c19edddf4b9d4e8fc134..1a3f0c0cbf8a1540e77cef70c753c91298728484 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "BatchNormBaseLayer.h" #include "Layer.h" #include "paddle/utils/Stat.h" @@ -46,9 +47,6 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h. - static const double MIN_EPS; - /// Epsilon value used in the batch normalization formula. /// Same epsilon value should be used in forward and backward functions. double eps_; diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index ad1251e3192d9c3b2bd46fce39322ec00b40ef05..e2f5592248fd0b6166c2d11af02cef7815673def 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -542,7 +542,7 @@ message LayerConfig { optional ReshapeConfig reshape_conf = 59; // for batch normalization layer - // small constant added to the variance to avoid numerical problems. + // The small constant added to the variance to improve numeric stability. optional double epsilon = 60 [ default = 0.00001 ]; } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index fd232f94159318dd42a84fdfc560bd61973fbd91..064933802f21e56e31d5d0691fee3a25ea2072f3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2483,8 +2483,9 @@ class BatchNormLayer(LayerBase): self.config.use_global_stats = use_global_stats if moving_average_fraction is not None: self.config.moving_average_fraction = moving_average_fraction - - self.config.epsilon = epsilon + if epsilon is not None: + assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." + self.config.epsilon = epsilon input_layer = self.get_input_layer(0) image_conf = self.config.inputs[0].image_conf diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index fa5e851390462f745b8467e49f0ebb1edbdb4826..4964c1245d8020d4c7c8875c92463acc5860fd02 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3107,7 +3107,7 @@ def batch_norm_layer(input, will use the mean and variance of the current batch of test data. :type use_global_stats: bool | None. - :param epsilon: Small constant added to the variance to avoid numerical problems. + :param epsilon: The small constant added to the variance to improve numeric stability. :type epsilon: float. :param moving_average_fraction: Factor used in the moving average computation. :math:`runningMean = newMean*(1-factor) + runningMean*factor` @@ -3127,8 +3127,6 @@ def batch_norm_layer(input, (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") - assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." - l = Layer( name=name, img3D=img3D,