提交 8a49f7f1 编写于 作者: P peterzhang2029

add epsilon in bn

上级 08bc08d6
......@@ -41,6 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats();
}
movingAvgFraction_ = config_.moving_average_fraction();
EPS = config_.epsilon();
weight_.reset(new Weight(1, channels_, parameters_[0]));
movingMean_.reset(new Weight(1, channels_, parameters_[1]));
......
......@@ -94,6 +94,8 @@ protected:
bool useGlobalStats_;
// use to compute moving mean and variance.
real movingAvgFraction_;
// Epsilon value used in the batch normalization formula.
real EPS;
};
} // namespace paddle
......@@ -22,8 +22,6 @@ namespace paddle {
REGISTER_LAYER(batch_norm, BatchNormalizationLayer);
const real BatchNormalizationLayer::EPS = 1E-5;
bool BatchNormalizationLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
......
......@@ -39,9 +39,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override;
protected:
/// Epsilon value used in the batch normalization formula.
static const real EPS;
/// Load pre-calculated mean and std.
void setMeanAndStd();
......
......@@ -21,7 +21,7 @@ namespace paddle {
REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer);
const double CudnnBatchNormLayer::EPS = 1E-5;
const double CudnnBatchNormLayer::MIN_EPS = 1E-5;
bool CudnnBatchNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
......@@ -60,6 +60,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
real* beta = biases_->getW()->getData();
real* movingMean = movingMean_->getW()->getData();
real* movingVar = movingVar_->getW()->getData();
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
if (!useGlobalStats_) {
REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str());
......@@ -75,7 +76,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
1.0 - movingAvgFraction_,
movingMean,
movingVar,
EPS,
EPS_,
savedMean,
savedInvVar);
} else {
......@@ -90,7 +91,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta,
movingMean,
movingVar,
EPS);
EPS_);
} else {
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
......@@ -101,7 +102,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta,
movingMean,
movingVar,
EPS,
EPS_,
batchSize,
channels_,
imageH_ * imageD_,
......@@ -127,6 +128,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
real* gamma = weight_->getW()->getData();
real* savedMean = savedMean_->getData();
real* savedInvVar = savedInvVar_->getData();
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) {
Matrix::resizeOrCreate(m, h, w, false, true);
......@@ -157,7 +159,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
gamma,
gammaGrad,
betaGrad,
EPS,
EPS_,
savedMean,
savedInvVar);
......
......@@ -47,11 +47,14 @@ public:
protected:
/**
* Epsilon value used in the batch normalization formula.
* Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
* Same epsilon value should be used in forward and backward functions.
*/
static const double EPS;
static const double MIN_EPS;
/// Epsilon value used in the batch normalization formula.
/// If EPS_ is smaller than MIN_EPS, MIN_EPS will be used.
double EPS_;
/// Input/output tensor descriptor desc
hl_tensor_descriptor ioDesc_;
......
......@@ -21,8 +21,6 @@ namespace paddle {
REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer);
const real MKLDNNBatchNormLayer::EPS = 1E-5;
bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
......@@ -50,6 +48,8 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats();
}
movingAvgFraction_ = config_.moving_average_fraction();
EPS = config_.epsilon();
VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use")
<< " --- global stats";
VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_;
......
......@@ -32,7 +32,8 @@ protected:
std::shared_ptr<bn_fwd::primitive_desc> fwdPD_;
// Epsilon value used in the batch normalization formula.
static const real EPS;
real EPS;
// weight and bias in paddle
std::unique_ptr<Weight> weight_;
std::unique_ptr<Weight> biases_;
......
......@@ -540,6 +540,10 @@ message LayerConfig {
// for switch order layer
optional ReshapeConfig reshape_conf = 59;
// for batch normalization layer
// small constant added to the variance to avoid numerical problems.
optional double epsilon = 60 [ default = 0.00001 ];
}
message EvaluatorConfig {
......
......@@ -2434,6 +2434,7 @@ class BatchNormLayer(LayerBase):
bias=True,
img3D=False,
use_global_stats=True,
epsilon=1e-5,
moving_average_fraction=0.9,
batch_norm_type=None,
mean_var_names=None,
......@@ -2482,6 +2483,8 @@ class BatchNormLayer(LayerBase):
self.config.use_global_stats = use_global_stats
if moving_average_fraction is not None:
self.config.moving_average_fraction = moving_average_fraction
if epsilon is not None:
self.config.epsilon = epsilon
input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].image_conf
......
......@@ -3036,6 +3036,7 @@ def batch_norm_layer(input,
param_attr=None,
layer_attr=None,
batch_norm_type=None,
epsilon=1e-5,
moving_average_fraction=0.9,
use_global_stats=None,
mean_var_names=None):
......@@ -3106,6 +3107,8 @@ def batch_norm_layer(input,
will use the mean and variance of the current batch
of test data.
:type use_global_stats: bool | None.
:param epsilon: Small constant added to the variance to avoid numerical problems.
:type epsilon: float.
:param moving_average_fraction: Factor used in the moving average computation.
:math:`runningMean = newMean*(1-factor) + runningMean*factor`
:type moving_average_fraction: float.
......@@ -3123,6 +3126,9 @@ def batch_norm_layer(input,
assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \
(batch_norm_type == "mkldnn_batch_norm") or \
(batch_norm_type == "cudnn_batch_norm")
assert epsilon >= 1e-5, "Parameter epsilon must be no less than 1e-5."
l = Layer(
name=name,
img3D=img3D,
......@@ -3132,6 +3138,7 @@ def batch_norm_layer(input,
type=LayerType.BATCH_NORM_LAYER,
batch_norm_type=batch_norm_type,
bias=ParamAttr.to_bias(bias_attr),
epsilon=epsilon,
moving_average_fraction=moving_average_fraction,
use_global_stats=use_global_stats,
mean_var_names=mean_var_names,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册