Merge pull request #3594 from chengduoZH/Adapting_to_the_BatchNorm_structure_to_support_3D_data

Adapting to the BatchNorm structure to support 3D data

Merge pull request #3594 from chengduoZH/Adapting_to_the_BatchNorm_structure_to_support_3D_data
Adapting to the BatchNorm structure to support 3D data
0be34949 · chengduo · GitHub · 2fefaa1c · 5500153a · 0be34949
15 changed file
--- a/paddle/gserver/layers/BatchNormBaseLayer.cpp
+++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp
@@ -62,14 +62,18 @@ void BatchNormBaseLayer::calFeatureMapSize() {
  const ImageConfig& conf = config_.inputs(0).image_conf();
  imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
  imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
+  imageD_ = inputLayers_[0]->getOutput().getFrameDepth();
+
+  if (0 == imageD_) imageD_ = conf.img_size_z();
  if (imageH_ == 0 && imageW_ == 0) {
    imageH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
    imageW_ = conf.img_size();
  } else {
    getOutput().setFrameHeight(imageH_);
    getOutput().setFrameWidth(imageW_);
+    getOutput().setFrameDepth(imageD_);
  }
-  imgPixels_ = imageH_ * imageW_;
+  imgPixels_ = imageH_ * imageW_ * imageD_;
 }

 }  // namespace paddle
--- a/paddle/gserver/layers/BatchNormBaseLayer.h
+++ b/paddle/gserver/layers/BatchNormBaseLayer.h
@@ -80,6 +80,7 @@ protected:

  /// Height or width of input image feature.
  /// Both of them are 1 if the input is fully-connected layer.
+  int imageD_;
  int imageH_;
  int imageW_;
  /// Height * Width.

--- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -37,7 +37,7 @@ bool CudnnBatchNormLayer::init(const LayerMap& layerMap,
 }

 void CudnnBatchNormLayer::reshape(int batchSize) {
-  hl_tensor_reshape(ioDesc_, batchSize, channels_, imageH_, imageW_);
+  hl_tensor_reshape(ioDesc_, batchSize, channels_, imageH_ * imageD_, imageW_);
 }

 void CudnnBatchNormLayer::forward(PassType passType) {
@@ -104,7 +104,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
                                   EPS,
                                   batchSize,
                                   channels_,
-                                   imageH_,
+                                   imageH_ * imageD_,
                                   imageW_);
    }
  }

--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1703,6 +1703,55 @@ TEST(Layer, BatchNormalizationLayer) {
 #endif
 }

+void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
+  TestConfig config;
+  const int CHANNELS = 10;
+  const int IMG_SIZE = 16;
+  const int IMG_SIZE_Y = 8;
+  const int IMG_SIZE_Z = 8;
+  size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y * IMG_SIZE_Z;
+  config.layerConfig.set_type(type);
+  config.layerConfig.set_size(size);
+  config.layerConfig.set_active_type("sigmoid");
+  config.biasSize = CHANNELS;
+  config.inputDefs.push_back({INPUT_DATA,
+                              "layer_0",
+                              /* dim= */ size,
+                              /* paraSize= */ CHANNELS});
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS});
+  config.inputDefs.back().isStatic = true;
+  config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS});
+  config.inputDefs.back().isStatic = true;
+
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  config.layerConfig.add_inputs();
+  config.layerConfig.add_inputs();
+
+  ImageConfig* img_conf = input->mutable_image_conf();
+  img_conf->set_channels(CHANNELS);
+  img_conf->set_img_size(IMG_SIZE);
+  img_conf->set_img_size_y(IMG_SIZE_Y);
+  img_conf->set_img_size_z(IMG_SIZE_Z);
+
+  testLayerGrad(config,
+                "batch_norm",
+                64,
+                /* trans= */ trans,
+                useGpu,
+                /* useWeight */ true);
+}
+
+TEST(Layer, testBatchNorm3DLayer) {
+  testBatchNorm3DLayer("batch_norm", false, false);
+#ifndef PADDLE_ONLY_CPU
+  testBatchNorm3DLayer("batch_norm", false, true);
+  if (hl_get_cudnn_lib_version() >= int(4000)) {
+    testBatchNorm3DLayer("cudnn_batch_norm", false, true);
+  }
+#endif
+}
+
 void testConvOperator(bool isDeconv) {
  TestConfig config;
  const int NUM_FILTERS = 16;

--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -520,6 +520,7 @@ message LayerConfig {
  // for HuberRegressionLoss
  optional double delta = 57 [ default = 1.0 ];

+  // for 3D data
  optional uint64 depth = 58 [ default = 1 ];

  // for switch order layer

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1332,6 +1332,12 @@ def parse_image(image, input_layer_name, image_conf):
        get_img_size(input_layer_name, image_conf.channels)


+def parse_image3d(image, input_layer_name, image_conf):
+    image_conf.channels = image.channels
+    image_conf.img_size, image_conf.img_size_y, image_conf.img_size_z = \
+        get_img3d_size(input_layer_name, image_conf.channels)
+
+
 def parse_norm(norm, input_layer_name, norm_conf):
    norm_conf.norm_type = norm.norm_type
    config_assert(
@@ -2365,6 +2371,7 @@ class BatchNormLayer(LayerBase):
                 name,
                 inputs,
                 bias=True,
+                 img3D=False,
                 use_global_stats=True,
                 moving_average_fraction=0.9,
                 batch_norm_type=None,
@@ -2410,13 +2417,31 @@ class BatchNormLayer(LayerBase):

        input_layer = self.get_input_layer(0)
        image_conf = self.config.inputs[0].image_conf
+        if img3D:
+            parse_image3d(self.inputs[0].image, input_layer.name, image_conf)
+            # Only pass the width and height of input to batch_norm layer
+            # when either of it is non-zero.
+            if input_layer.width != 0 or input_layer.height != 0:
+                self.set_cnn_layer(
+                    input_layer_name=name,
+                    depth=image_conf.img_size_z,
+                    height=image_conf.img_size_y,
+                    width=image_conf.img_size,
+                    channels=image_conf.channels,
+                    is_print=True)
+            else:
+                self.set_layer_size(input_layer.size)
+        else:
            parse_image(self.inputs[0].image, input_layer.name, image_conf)
-
            # Only pass the width and height of input to batch_norm layer
            # when either of it is non-zero.
            if input_layer.width != 0 or input_layer.height != 0:
-            self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
-                               image_conf.channels, False)
+                self.set_cnn_layer(
+                    input_layer_name=name,
+                    height=image_conf.img_size_y,
+                    width=image_conf.img_size,
+                    channels=image_conf.channels,
+                    is_print=True)
            else:
                self.set_layer_size(input_layer.size)

@@ -2433,6 +2458,28 @@ class BatchNormLayer(LayerBase):

        self.create_bias_parameter(bias, psize)

+    def set_cnn_layer(self,
+                      input_layer_name,
+                      depth=None,
+                      height=None,
+                      width=None,
+                      channels=None,
+                      is_print=True):
+        depthIsNone = False
+        if depth is None:
+            depth = 1
+            depthIsNone = True
+        size = depth * height * width * channels
+        self.set_layer_size(size)
+        self.set_layer_height_width(height, width)
+        self.set_layer_depth(depth)
+        if is_print and depthIsNone:
+            print("output for %s: c = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, height, width, size))
+        elif is_print:
+            print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
+                  (input_layer_name, channels, depth, height, width, size))
+
    def calc_parameter_size(self, image_conf):
        return image_conf.channels

@@ -2694,9 +2741,20 @@ class AddToLayer(LayerBase):
        super(AddToLayer, self).__init__(
            name, 'addto', 0, inputs=inputs, **xargs)
        config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer')
+
+        if len(self.inputs) > 1:
            for input_index in xrange(len(self.inputs)):
-            input_layer = self.get_input_layer(input_index)
-            self.set_layer_size(input_layer.size)
+                assert self.get_input_layer(0).height == self.get_input_layer(
+                    input_index).height
+                assert self.get_input_layer(0).width == self.get_input_layer(
+                    input_index).width
+                assert self.get_input_layer(0).depth == self.get_input_layer(
+                    input_index).depth
+
+        self.set_layer_size(self.get_input_layer(0).size)
+        self.set_layer_height_width(self.get_input_layer(0).height, \
+                                        self.get_input_layer(0).width)
+        self.set_layer_depth(self.get_input_layer(0).depth)
        self.create_bias_parameter(bias, self.config.size)


@@ -3376,11 +3434,20 @@ class ConcatenateLayer(LayerBase):
            name, 'concat', 0, inputs=inputs, **xargs)
        size = 0
        for input_index in xrange(len(self.inputs)):
+            assert self.get_input_layer(0).height == self.get_input_layer(
+                input_index).height
+            assert self.get_input_layer(0).width == self.get_input_layer(
+                input_index).width
+            assert self.get_input_layer(0).depth == self.get_input_layer(
+                input_index).depth
            input_layer = self.get_input_layer(input_index)
            input = self.inputs[input_index]
            if self.config.size == 0:
                size += input_layer.size

+        self.set_layer_height_width(self.get_input_layer(0).height, \
+                                    self.get_input_layer(0).width)
+        self.set_layer_depth(self.get_input_layer(0).depth)
        self.set_layer_size(size)



--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -354,6 +354,10 @@ class LayerOutput(object):
    def height(self):
        return cp.g_layer_map[self.full_name].height

+    @property
+    def depth(self):
+        return cp.g_layer_map[self.full_name].depth
+
    def set_input(self, input):
        """
        Set the input for a memory layer. Can only be used for memory layer
@@ -2953,6 +2957,7 @@ def img_cmrnorm_layer(input,
 def batch_norm_layer(input,
                     act=None,
                     name=None,
+                     img3D=False,
                     num_channels=None,
                     bias_attr=None,
                     param_attr=None,
@@ -3042,6 +3047,7 @@ def batch_norm_layer(input,
           (batch_norm_type == "cudnn_batch_norm")
    l = Layer(
        name=name,
+        img3D=img3D,
        inputs=Input(
            input.name, image=Image(channels=num_channels), **param_attr.attr),
        active_type=act.name,

--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -10,6 +10,6 @@ test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_la
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
 test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
 test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer
-test_conv3d_layer test_deconv3d_layer)
+test_conv3d_layer test_deconv3d_layer test_BatchNorm3D)

 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
@@ -62,6 +62,7 @@ layers {
  moving_average_fraction: 0.9
  height: 227
  width: 227
+  depth: 1
 }
 layers {
  name: "__crmnorm_0__"

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
@@ -62,6 +62,7 @@ layers {
  moving_average_fraction: 0.9
  height: 256
  width: 256
+  depth: 1
 }
 layers {
  name: "__crmnorm_0__"

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+type: "nn"
+layers {
+  name: "data3D"
+  type: "data"
+  size: 360
+  active_type: ""
+  height: 6
+  width: 20
+  depth: 3
+}
+layers {
+  name: "__batch_norm_0__"
+  type: "batch_norm"
+  size: 360
+  active_type: "relu"
+  inputs {
+    input_layer_name: "data3D"
+    input_parameter_name: "___batch_norm_0__.w0"
+    image_conf {
+      channels: 1
+      img_size: 20
+      img_size_y: 6
+      img_size_z: 3
+    }
+  }
+  inputs {
+    input_layer_name: "data3D"
+    input_parameter_name: "___batch_norm_0__.w1"
+  }
+  inputs {
+    input_layer_name: "data3D"
+    input_parameter_name: "___batch_norm_0__.w2"
+  }
+  bias_parameter_name: "___batch_norm_0__.wbias"
+  moving_average_fraction: 0.9
+  height: 6
+  width: 20
+  depth: 3
+}
+parameters {
+  name: "___batch_norm_0__.w0"
+  size: 1
+  initial_mean: 1.0
+  initial_std: 0.0
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___batch_norm_0__.w1"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.w2"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+  is_static: true
+  is_shared: true
+}
+parameters {
+  name: "___batch_norm_0__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data3D"
+output_layer_names: "__batch_norm_0__"
+sub_models {
+  name: "root"
+  layer_names: "data3D"
+  layer_names: "__batch_norm_0__"
+  input_layer_names: "data3D"
+  output_layer_names: "__batch_norm_0__"
+  is_recurrent_layer_group: false
+}
+
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
@@ -74,6 +74,9 @@ layers {
  inputs {
    input_layer_name: "__bidirectional_gru_0___bw"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 parameters {
  name: "___bidirectional_gru_0___fw_transform.w0"

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
@@ -16,6 +16,9 @@ layers {
  inputs {
    input_layer_name: "data"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_1__"
@@ -28,6 +31,9 @@ layers {
  inputs {
    input_layer_name: "__addto_0__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_2__"
@@ -40,6 +46,9 @@ layers {
  inputs {
    input_layer_name: "__addto_1__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_3__"
@@ -52,6 +61,9 @@ layers {
  inputs {
    input_layer_name: "__addto_2__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_4__"
@@ -64,6 +76,9 @@ layers {
  inputs {
    input_layer_name: "__addto_3__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_5__"
@@ -76,6 +91,9 @@ layers {
  inputs {
    input_layer_name: "__addto_4__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_6__"
@@ -88,6 +106,9 @@ layers {
  inputs {
    input_layer_name: "__addto_5__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_7__"
@@ -100,6 +121,9 @@ layers {
  inputs {
    input_layer_name: "__addto_6__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_8__"
@@ -112,6 +136,9 @@ layers {
  inputs {
    input_layer_name: "__addto_7__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_9__"
@@ -124,6 +151,9 @@ layers {
  inputs {
    input_layer_name: "__addto_8__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_10__"
@@ -136,6 +166,9 @@ layers {
  inputs {
    input_layer_name: "__addto_9__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_11__"
@@ -148,6 +181,9 @@ layers {
  inputs {
    input_layer_name: "__addto_10__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_12__"
@@ -160,6 +196,9 @@ layers {
  inputs {
    input_layer_name: "__addto_11__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_13__"
@@ -172,6 +211,9 @@ layers {
  inputs {
    input_layer_name: "__addto_12__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_14__"
@@ -184,6 +226,9 @@ layers {
  inputs {
    input_layer_name: "__addto_13__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_15__"
@@ -196,6 +241,9 @@ layers {
  inputs {
    input_layer_name: "__addto_14__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_16__"
@@ -208,6 +256,9 @@ layers {
  inputs {
    input_layer_name: "__addto_15__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_17__"
@@ -220,6 +271,9 @@ layers {
  inputs {
    input_layer_name: "__addto_16__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_18__"
@@ -232,6 +286,9 @@ layers {
  inputs {
    input_layer_name: "__addto_17__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_19__"
@@ -244,6 +301,9 @@ layers {
  inputs {
    input_layer_name: "__addto_18__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_20__"
@@ -256,6 +316,9 @@ layers {
  inputs {
    input_layer_name: "__addto_19__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_21__"
@@ -268,6 +331,9 @@ layers {
  inputs {
    input_layer_name: "__addto_20__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_22__"
@@ -280,6 +346,9 @@ layers {
  inputs {
    input_layer_name: "__addto_21__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_23__"
@@ -292,6 +361,9 @@ layers {
  inputs {
    input_layer_name: "__addto_22__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_24__"
@@ -304,6 +376,9 @@ layers {
  inputs {
    input_layer_name: "__addto_23__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_25__"
@@ -316,6 +391,9 @@ layers {
  inputs {
    input_layer_name: "__addto_24__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_26__"
@@ -328,6 +406,9 @@ layers {
  inputs {
    input_layer_name: "__addto_25__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_27__"
@@ -340,6 +421,9 @@ layers {
  inputs {
    input_layer_name: "__addto_26__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_28__"
@@ -352,6 +436,9 @@ layers {
  inputs {
    input_layer_name: "__addto_27__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_29__"
@@ -364,6 +451,9 @@ layers {
  inputs {
    input_layer_name: "__addto_28__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_30__"
@@ -376,6 +466,9 @@ layers {
  inputs {
    input_layer_name: "__addto_29__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__addto_31__"
@@ -388,6 +481,9 @@ layers {
  inputs {
    input_layer_name: "__addto_30__"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__fc_layer_0__"

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
@@ -22,6 +22,9 @@ layers {
  inputs {
    input_layer_name: "b"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__concat_0__"
@@ -34,6 +37,9 @@ layers {
  inputs {
    input_layer_name: "b"
  }
+  height: 0
+  width: 0
+  depth: 1
 }
 layers {
  name: "__concat_1__"

--- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=1000, learning_rate=1e-4)
+
+#data = data_layer(name='data', size=180, width=30, height=6)
+#batchNorm = batch_norm_layer(data, num_channels=1)
+#outputs(batchNorm)
+
+data3D = data_layer(name='data3D', size=120 * 3, width=20, height=6, depth=3)
+batchNorm3D = batch_norm_layer(data3D, num_channels=1, img3D=True)
+outputs(batchNorm3D)