From a689b8dd0e82c548efdebbec928a6f15717cc450 Mon Sep 17 00:00:00 2001
From: chenguowei01 <chenguowei01@baidu.com>
Date: Thu, 18 Jun 2020 14:47:39 +0800
Subject: [PATCH] add mult grid and rgb change

---
 pdseg/models/backbone/resnet_vd.py | 253 ++++++++++++++---------------
 pdseg/reader.py                    |   2 +
 pdseg/utils/config.py              |   2 +
 3 files changed, 130 insertions(+), 127 deletions(-)

diff --git a/pdseg/models/backbone/resnet_vd.py b/pdseg/models/backbone/resnet_vd.py
index 8d684656..827dad5b 100644
--- a/pdseg/models/backbone/resnet_vd.py
+++ b/pdseg/models/backbone/resnet_vd.py
@@ -65,6 +65,7 @@ class ResNet():
             dilation_dict=None):
         layers = self.layers
         supported_layers = [18, 34, 50, 101, 152]
+        mult_grid = [1, 2, 4]
         assert layers in supported_layers, \
             "supported layers are {} but input layer is {}".format(supported_layers, layers)
 
@@ -96,37 +97,42 @@ class ResNet():
         num_filters = [64, 128, 256, 512]
 
         if self.stem == 'icnet' or self.stem == 'pspnet' or self.stem == 'deeplab':
-            conv = self.conv_bn_layer(input=input,
-                                      num_filters=int(32 * self.scale),
-                                      filter_size=3,
-                                      stride=2,
-                                      act='relu',
-                                      name="conv1_1")
-            conv = self.conv_bn_layer(input=conv,
-                                      num_filters=int(32 * self.scale),
-                                      filter_size=3,
-                                      stride=1,
-                                      act='relu',
-                                      name="conv1_2")
-            conv = self.conv_bn_layer(input=conv,
-                                      num_filters=int(64 * self.scale),
-                                      filter_size=3,
-                                      stride=1,
-                                      act='relu',
-                                      name="conv1_3")
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=int(32 * self.scale),
+                filter_size=3,
+                stride=2,
+                act='relu',
+                name="conv1_1")
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=int(32 * self.scale),
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name="conv1_2")
+            conv = self.conv_bn_layer(
+                input=conv,
+                num_filters=int(64 * self.scale),
+                filter_size=3,
+                stride=1,
+                act='relu',
+                name="conv1_3")
         else:
-            conv = self.conv_bn_layer(input=input,
-                                      num_filters=int(64 * self.scale),
-                                      filter_size=7,
-                                      stride=2,
-                                      act='relu',
-                                      name="conv1")
-
-        conv = fluid.layers.pool2d(input=conv,
-                                   pool_size=3,
-                                   pool_stride=2,
-                                   pool_padding=1,
-                                   pool_type='max')
+            conv = self.conv_bn_layer(
+                input=input,
+                num_filters=int(64 * self.scale),
+                filter_size=7,
+                stride=2,
+                act='relu',
+                name="conv1")
+
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
 
         layer_count = 1
         if check_points(layer_count, decode_points):
@@ -147,6 +153,8 @@ class ResNet():
                     else:
                         conv_name = "res" + str(block + 2) + chr(97 + i)
                     dilation_rate = get_dilated_rate(dilation_dict, block)
+                    if block == 3:
+                        dilation_rate = dilation_rate * mult_grid[i]
 
                     conv = self.bottleneck_block(
                         input=conv,
@@ -170,10 +178,8 @@ class ResNet():
                             np.ceil(
                                 np.array(conv.shape[2:]).astype('int32') / 2))
 
-            pool = fluid.layers.pool2d(input=conv,
-                                       pool_size=7,
-                                       pool_type='avg',
-                                       global_pooling=True)
+            pool = fluid.layers.pool2d(
+                input=conv, pool_size=7, pool_type='avg', global_pooling=True)
             stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
             out = fluid.layers.fc(
                 input=pool,
@@ -198,10 +204,8 @@ class ResNet():
                     if check_points(layer_count, end_points):
                         return conv, decode_ends
 
-            pool = fluid.layers.pool2d(input=conv,
-                                       pool_size=7,
-                                       pool_type='avg',
-                                       global_pooling=True)
+            pool = fluid.layers.pool2d(
+                input=conv, pool_size=7, pool_type='avg', global_pooling=True)
             stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
             out = fluid.layers.fc(
                 input=pool,
@@ -234,19 +238,18 @@ class ResNet():
         else:
             bias_attr = False
 
-        conv = fluid.layers.conv2d(input=input,
-                                   num_filters=num_filters,
-                                   filter_size=filter_size,
-                                   stride=stride,
-                                   padding=(filter_size - 1) //
-                                   2 if dilation == 1 else 0,
-                                   dilation=dilation,
-                                   groups=groups,
-                                   act=None,
-                                   param_attr=ParamAttr(name=name + "_weights",
-                                                        learning_rate=lr_mult),
-                                   bias_attr=bias_attr,
-                                   name=name + '.conv2d.output.1')
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2 if dilation == 1 else 0,
+            dilation=dilation,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights", learning_rate=lr_mult),
+            bias_attr=bias_attr,
+            name=name + '.conv2d.output.1')
 
         if name == "conv1":
             bn_name = "bn_" + name
@@ -256,8 +259,8 @@ class ResNet():
             input=conv,
             act=act,
             name=bn_name + '.output.1',
-            param_attr=ParamAttr(name=bn_name + '_scale',
-                                 learning_rate=lr_mult),
+            param_attr=ParamAttr(
+                name=bn_name + '_scale', learning_rate=lr_mult),
             bias_attr=ParamAttr(bn_name + '_offset', learning_rate=lr_mult),
             moving_mean_name=bn_name + '_mean',
             moving_variance_name=bn_name + '_variance',
@@ -272,23 +275,24 @@ class ResNet():
                           act=None,
                           name=None):
         lr_mult = self.lr_mult_list[self.curr_stage]
-        pool = fluid.layers.pool2d(input=input,
-                                   pool_size=2,
-                                   pool_stride=2,
-                                   pool_padding=0,
-                                   pool_type='avg',
-                                   ceil_mode=True)
-
-        conv = fluid.layers.conv2d(input=pool,
-                                   num_filters=num_filters,
-                                   filter_size=filter_size,
-                                   stride=1,
-                                   padding=(filter_size - 1) // 2,
-                                   groups=groups,
-                                   act=None,
-                                   param_attr=ParamAttr(name=name + "_weights",
-                                                        learning_rate=lr_mult),
-                                   bias_attr=False)
+        pool = fluid.layers.pool2d(
+            input=input,
+            pool_size=2,
+            pool_stride=2,
+            pool_padding=0,
+            pool_type='avg',
+            ceil_mode=True)
+
+        conv = fluid.layers.conv2d(
+            input=pool,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=1,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=ParamAttr(name=name + "_weights", learning_rate=lr_mult),
+            bias_attr=False)
         if name == "conv1":
             bn_name = "bn_" + name
         else:
@@ -296,24 +300,20 @@ class ResNet():
         return fluid.layers.batch_norm(
             input=conv,
             act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale',
-                                 learning_rate=lr_mult),
+            param_attr=ParamAttr(
+                name=bn_name + '_scale', learning_rate=lr_mult),
             bias_attr=ParamAttr(bn_name + '_offset', learning_rate=lr_mult),
             moving_mean_name=bn_name + '_mean',
             moving_variance_name=bn_name + '_variance')
 
     def shortcut(self, input, ch_out, stride, is_first, name):
         ch_in = input.shape[1]
-        print('shortcut:', stride, is_first, ch_in, ch_out)
         if ch_in != ch_out or stride != 1:
             if is_first or stride == 1:
                 return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
             else:
-                return self.conv_bn_layer_new(input,
-                                              ch_out,
-                                              1,
-                                              stride,
-                                              name=name)
+                return self.conv_bn_layer_new(
+                    input, ch_out, 1, stride, name=name)
         elif is_first:
             return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
         else:
@@ -326,59 +326,58 @@ class ResNet():
                          name,
                          is_first=False,
                          dilation=1):
-        conv0 = self.conv_bn_layer(input=input,
-                                   num_filters=num_filters,
-                                   filter_size=1,
-                                   dilation=1,
-                                   stride=1,
-                                   act='relu',
-                                   name=name + "_branch2a")
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=1,
+            dilation=1,
+            stride=1,
+            act='relu',
+            name=name + "_branch2a")
         if dilation > 1:
             conv0 = self.zero_padding(conv0, dilation)
-        conv1 = self.conv_bn_layer(input=conv0,
-                                   num_filters=num_filters,
-                                   filter_size=3,
-                                   dilation=dilation,
-                                   stride=stride,
-                                   act='relu',
-                                   name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(input=conv1,
-                                   num_filters=num_filters * 4,
-                                   dilation=1,
-                                   filter_size=1,
-                                   act=None,
-                                   name=name + "_branch2c")
-
-        short = self.shortcut(input,
-                              num_filters * 4,
-                              stride,
-                              is_first=is_first,
-                              name=name + "_branch1")
-        print(input.shape, short.shape, conv2.shape)
-        print(stride)
-
-        return fluid.layers.elementwise_add(x=short,
-                                            y=conv2,
-                                            act='relu',
-                                            name=name + ".add.output.5")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            dilation=dilation,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            num_filters=num_filters * 4,
+            dilation=1,
+            filter_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        short = self.shortcut(
+            input,
+            num_filters * 4,
+            stride,
+            is_first=is_first,
+            name=name + "_branch1")
+
+        return fluid.layers.elementwise_add(
+            x=short, y=conv2, act='relu', name=name + ".add.output.5")
 
     def basic_block(self, input, num_filters, stride, is_first, name):
-        conv0 = self.conv_bn_layer(input=input,
-                                   num_filters=num_filters,
-                                   filter_size=3,
-                                   act='relu',
-                                   stride=stride,
-                                   name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(input=conv0,
-                                   num_filters=num_filters,
-                                   filter_size=3,
-                                   act=None,
-                                   name=name + "_branch2b")
-        short = self.shortcut(input,
-                              num_filters,
-                              stride,
-                              is_first,
-                              name=name + "_branch1")
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=num_filters,
+            filter_size=3,
+            act='relu',
+            stride=stride,
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b")
+        short = self.shortcut(
+            input, num_filters, stride, is_first, name=name + "_branch1")
         return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
 
 
diff --git a/pdseg/reader.py b/pdseg/reader.py
index 6fe395d5..a18ea831 100644
--- a/pdseg/reader.py
+++ b/pdseg/reader.py
@@ -318,6 +318,8 @@ class SegDataset(object):
             raise ValueError("Dataset mode={} Error!".format(mode))
 
         # Normalize image
+        if cfg.AUG.TO_RGB:
+            img = img[..., ::-1]
         img = self.normalize_image(img)
 
         if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode):
diff --git a/pdseg/utils/config.py b/pdseg/utils/config.py
index 9ba00e05..a139c0a3 100644
--- a/pdseg/utils/config.py
+++ b/pdseg/utils/config.py
@@ -117,6 +117,8 @@ cfg.AUG.RICH_CROP.CONTRAST_JITTER_RATIO = 0.5
 cfg.AUG.RICH_CROP.BLUR = False
 # 图像启动模糊百分比，0-1
 cfg.AUG.RICH_CROP.BLUR_RATIO = 0.1
+# 图像是否切换到rgb模式
+cfg.AUG.TO_RGB = True
 
 ########################### 训练配置 ##########################################
 # 模型保存路径
-- 
GitLab