BatchNormBaseLayer.h 3.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "Layer.h"
Y
Yu Yang 已提交
18
#include "paddle/utils/Stat.h"
Z
zhangjinchao01 已提交
19 20 21 22

namespace paddle {

/**
23 24
 * @brief Batch normalization layer use to normalizes the input to across the
 * batch.
Z
zhangjinchao01 已提交
25 26 27 28 29 30
 *
 * By default, calculating global mean and variance statistics via a running
 * average in the training peroid. Then the pre-calculated global mean and
 * variance are used for testing.
 *
 * Moving mean and variance are located in Parameter object when constructing
31
 * and the calculation will change them. Now we only save global mean and
Z
zhangjinchao01 已提交
32 33 34 35 36 37 38 39 40 41 42 43
 * variance of one thread in first node for GPU.
 * But the calculation in CPU is different, because parameters are shared by
 * multiple threads. Here using ShareCpuMatrix with lock to calculate. We
 * still save global mean and variance in first node in CPU when multi machine.
 *
 * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network
 *     Training by Reducing Internal Covariate Shift." arXiv preprint
 *     arXiv:1502.03167 (2015).
 */

class BatchNormBaseLayer : public Layer {
public:
44
  explicit BatchNormBaseLayer(const LayerConfig& config) : Layer(config) {}
Z
zhangjinchao01 已提交
45 46 47 48 49 50 51 52 53 54 55 56

  ~BatchNormBaseLayer() {}

  /**
   * @brief Create BatchNorm layer by norm_type, including batch_norm and
   * cudnn_batch_norm. If do not set norm_type, it will automatically select
   * cudnn_batch_norm for GPU and batch_norm for CPU.
   */
  static Layer* create(const LayerConfig& config);

  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);

57 58
  /**
   * @brief Calculate feature map size. Some input uses frameHeight and
Z
zhangjinchao01 已提交
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
   * frameWidth to store feature size
   */
  void calFeatureMapSize();

protected:
  /// Batch normalization scale parameter, which is referred to as gamma in
  /// in original paper.
  std::unique_ptr<Weight> weight_;
  /// Moving average of mean.
  std::unique_ptr<Weight> movingMean_;
  /// Moving average of variance.
  std::unique_ptr<Weight> movingVar_;
  /// Batch normalization bias parameter, which is referred to as beta in
  /// in original paper.
  std::unique_ptr<Weight> biases_;

  /// Save intermediate results computed during the forward pass,
  /// these can then be reused to speed up the backward pass.
  MatrixPtr savedMean_;
  MatrixPtr savedInvVar_;

L
Luo Tao 已提交
80 81
  /// Height or width of input image feature.
  /// Both of them are 1 if the input is fully-connected layer.
Z
zhangjinchao01 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
  int imageH_;
  int imageW_;
  /// Height * Width.
  int imgPixels_;
  /// Feature dimension. If the input layer is conv layer, it is the channels
  /// of feature map of the conv layer. If the input layer is fully-connected
  /// layer, it is the dimension of fc layer.
  int channels_;
  // if useGlobalStats_ is true, will use the loaded mean and variance.
  // otherwise, calculate mean and variance in this mini-batch.
  bool useGlobalStats_;
  // use to compute moving mean and variance.
  real movingAvgFraction_;
};

}  // namespace paddle