add pspnet

8e89111f · pengmian · 52222565 · 8e89111f · 8e89111f · 8e89111f
6 changed file
--- a/configs/pspnet.yaml
+++ b/configs/pspnet.yaml
+EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (713, 713) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+    AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+    FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding
+    INF_RESIZE_VALUE: 500  # for rangescaling
+    MAX_RESIZE_VALUE: 600  # for rangescaling
+    MIN_RESIZE_VALUE: 400  # for rangescaling
+    MAX_SCALE_FACTOR: 2.0  # for stepscaling
+    MIN_SCALE_FACTOR: 0.5  # for stepscaling
+    SCALE_STEP_SIZE: 0.25  # for stepscaling
+    MIRROR: True
+    RICH_CROP:
+        ENABLE: False
+        ASPECT_RATIO: 0.33
+        BLUR: True
+        BLUR_RATIO: 0.1
+        FLIP: True
+        FLIP_RATIO: 0.2
+        MAX_ROTATION: 15
+        MIN_AREA_RATIO: 0.5
+        BRIGHTNESS_JITTER_RATIO: 0.5
+        CONTRAST_JITTER_RATIO: 0.5
+        SATURATION_JITTER_RATIO: 0.5
+BATCH_SIZE: 4
+DATASET:
+    DATA_DIR: "./dataset/cityscapes/"
+    IMAGE_TYPE: "rgb"  # choice rgb or rgba
+    NUM_CLASSES: 19
+    TEST_FILE_LIST: "dataset/cityscapes/val.list"
+    TRAIN_FILE_LIST: "dataset/cityscapes/train.list"
+    VAL_FILE_LIST: "dataset/cityscapes/val.list"
+    IGNORE_INDEX: 255
+FREEZE:
+    MODEL_FILENAME: "model"
+    PARAMS_FILENAME: "params"
+MODEL:
+    MODEL_NAME: "pspnet"
+    DEFAULT_NORM_TYPE: "bn"
+TEST:
+    TEST_MODEL: "pretrained_model/pspnet50_ADE20K/" #pspnet101_cityscapes
+TRAIN:
+    MODEL_SAVE_DIR: "snapshots/cityscape_pspnet50/"
+    PRETRAINED_MODEL: u"pretrained_model/pspnet50_ADE20K/"
+    RESUME: False
+    SNAPSHOT_EPOCH: 10
+SOLVER:
+    LR: 0.001
+    LR_POLICY: "poly"
+    OPTIMIZER: "sgd"
+    NUM_EPOCHS: 700
--- a/pdseg/data_aug.py
+++ b/pdseg/data_aug.py
@@ -374,7 +374,7 @@ def rand_crop(crop_img, crop_seg, mode=ModelPhase.TRAIN):
    Args:
        crop_img(numpy.ndarray): 输入图像
        crop_seg(numpy.ndarray): 标签图
-        mode(string): 模式, 默认训练模式，验证或预测模式时crop尺寸需大于原始图片尺寸, 其他模式无限制
+        mode(string): 模式, 默认训练模式，验证或预测、可视化模式时crop尺寸需大于原始图片尺寸

    Returns：
        裁剪后的图片和标签图
@@ -391,7 +391,7 @@ def rand_crop(crop_img, crop_seg, mode=ModelPhase.TRAIN):
        crop_width = cfg.EVAL_CROP_SIZE[0]
        crop_height = cfg.EVAL_CROP_SIZE[1]

-    if ModelPhase.is_eval(mode) or ModelPhase.is_predict(mode):
+    if not ModelPhase.is_train(mode): 
        if (crop_height < img_height or crop_width < img_width):
            raise Exception(
                "Crop size({},{}) must large than img size({},{}) when in EvalPhase."

--- a/pdseg/models/backbone/resnet.py
+++ b/pdseg/models/backbone/resnet.py
@@ -85,7 +85,7 @@ class ResNet():
            depth = [3, 8, 36, 3]
        num_filters = [64, 128, 256, 512]

-        if self.stem == 'icnet':
+        if self.stem == 'icnet' or self.stem == 'pspnet':
            conv = self.conv_bn_layer(
                input=input,
                num_filters=int(64 * self.scale),
@@ -133,22 +133,20 @@ class ResNet():
        if layers >= 50:
            for block in range(len(depth)):
                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "conv" + str(block + 2) + '_' + str(1 + i)
+                    conv_name = "conv" + str(block + 2) + '_' + str(1 + i)
                    dilation_rate = get_dilated_rate(dilation_dict, block)
-
+                    
+                    if self.stem == 'pspnet':
+                        stride = 2 if i == 0 and block == 1 else 1
+                    else:
+                        stride= 2 if i == 0 and block != 0 and dilation_rate == 1 else 1
+                    
                    conv = self.bottleneck_block(
-                        input=conv,
-                        num_filters=int(num_filters[block] * self.scale),
-                        stride=2
-                        if i == 0 and block != 0 and dilation_rate == 1 else 1,
-                        name=conv_name,
-                        dilation=dilation_rate)
+                            input=conv,
+                            num_filters=int(num_filters[block] * self.scale),
+                            stride=stride,
+                            name=conv_name, 
+                            dilation=dilation_rate)
                    layer_count += 3

                    if check_points(layer_count, decode_points):
@@ -174,7 +172,7 @@ class ResNet():
        else:
            for block in range(len(depth)):
                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    conv_name = "conv" + str(block + 2) + chr(97 + i)
                    conv = self.basic_block(
                        input=conv,
                        num_filters=num_filters[block],
@@ -215,6 +213,12 @@ class ResNet():
                      groups=1,
                      act=None,
                      name=None):
+   
+        if self.stem == 'pspnet':
+            bias_attr=ParamAttr(name=name + "/biases")
+        else:
+            bias_attr=False
+
        conv = fluid.layers.conv2d(
            input=input,
            num_filters=num_filters,
@@ -225,7 +229,7 @@ class ResNet():
            groups=groups,
            act=None,
            param_attr=ParamAttr(name=name + "/weights"),
-            bias_attr=False,
+            bias_attr=bias_attr,
            name=name + '.conv2d.output.1')

        bn_name = name + '/BatchNorm/'
@@ -247,12 +251,17 @@ class ResNet():
            return input

    def bottleneck_block(self, input, num_filters, stride, name, dilation=1):
+        if self.stem == 'pspnet' and self.layers == 101:
+            strides = [1, stride]
+        else:
+            strides = [stride, 1]
+        
        conv0 = self.conv_bn_layer(
            input=input,
            num_filters=num_filters,
            filter_size=1,
            dilation=1,
-            stride=stride,
+            stride=strides[0],
            act='relu',
            name=name + "_branch2a")
        if dilation > 1:
@@ -262,6 +271,7 @@ class ResNet():
            num_filters=num_filters,
            filter_size=3,
            dilation=dilation,
+            stride=strides[1],
            act='relu',
            name=name + "_branch2b")
        conv2 = self.conv_bn_layer(

--- a/pdseg/models/model_builder.py
+++ b/pdseg/models/model_builder.py
@@ -73,6 +73,7 @@ def map_model_name(model_name):
        "unet": "unet.unet",
        "deeplabv3p": "deeplab.deeplabv3p",
        "icnet": "icnet.icnet",
+        "pspnet": "pspnet.pspnet",
    }
    if model_name in name_dict.keys():
        return name_dict[model_name]

--- a/pdseg/models/modeling/pspnet.py
+++ b/pdseg/models/modeling/pspnet.py
+# -*- coding: utf-8 -*- 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+from models.libs.model_libs import scope, name_scope
+from models.libs.model_libs import avg_pool , conv, bn
+from models.backbone.resnet import ResNet as resnet_backbone
+from utils.config import cfg
+
+def get_logit_interp(input, num_classes, out_shape, name="logit"):
+    param_attr = fluid.ParamAttr(
+        name=name + 'weights',
+        regularizer=fluid.regularizer.L2DecayRegularizer(
+            regularization_coeff=0.0),
+        initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
+
+    with scope(name):
+        logit = conv(
+            input,
+            num_classes,
+            filter_size=1,
+            param_attr=param_attr,
+            bias_attr=True,
+            name=name+'.conv2d.output.1')
+        logit_interp = fluid.layers.resize_bilinear(
+                    logit, 
+                    out_shape=out_shape,
+                    name='logit_interp') 
+    return logit_interp
+
+
+def psp_module(input, out_features):
+    cat_layers = []
+    sizes = (1,2,3,6)
+    for size in sizes:
+        psp_name = "psp_conv" + str(size)
+        with scope(psp_name):
+            pool = fluid.layers.adaptive_pool2d(input, 
+                pool_size=[size, size], 
+                pool_type='avg', 
+                name=psp_name+'_adapool')
+            data = conv(pool, out_features, filter_size=1, bias_attr=True, 
+                    name= psp_name + '.conv2d.output.1')
+            data_bn = bn(data, act='relu')
+            interp = fluid.layers.resize_bilinear(data_bn, 
+                out_shape=input.shape[2:], 
+                name=psp_name+'_interp') 
+        cat_layers.append(interp)
+    cat_layers = [input] + cat_layers[::-1]
+    cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat')
+    with scope("psp_conv_end"):
+        data = conv(cat, 
+                out_features, 
+                filter_size=3,
+                padding=1, 
+                bias_attr=True,
+                name='psp_conv_end.conv2d.output.1')
+        out = bn(data, act='relu')
+
+    return out
+
+def resnet(input):
+    # PSPNET backbone: resnet, ĬÈresnet50
+    # end_points: resnetÖֹ²ã
+
+    scale = cfg.MODEL.PSPNET.DEPTH_MULTIPLIER
+    layers = cfg.MODEL.PSPNET.LAYERS
+    end_points = layers - 1
+    dilation_dict = {2:2, 3:4}
+    model = resnet_backbone(layers, scale, stem='pspnet')
+    data, _ = model.net(input, end_points=end_points, dilation_dict=dilation_dict)
+
+    return data
+
+def pspnet(input, num_classes):
+    res = resnet(input)
+    psp = psp_module(res, 512)
+    #dropout = fluid.layers.dropout(psp, dropout_prob=0.1, name="dropout")
+    logit = get_logit_interp(psp, num_classes, input.shape[2:])
+    return logit
+
+
--- a/pdseg/utils/config.py
+++ b/pdseg/utils/config.py
@@ -196,6 +196,12 @@ cfg.MODEL.ICNET.DEPTH_MULTIPLIER = 0.5
 # RESNET 层数 设置
 cfg.MODEL.ICNET.LAYERS = 50

+########################## PSPNET模型配置 ######################################
+# RESNET backbone scale 设置
+cfg.MODEL.PSPNET.DEPTH_MULTIPLIER = 1
+# RESNET 层数 设置 50或101
+cfg.MODEL.PSPNET.LAYERS = 50
+
 ########################## 预测部署模型配置 ###################################
 # 预测保存的模型名称
 cfg.FREEZE.MODEL_FILENAME = '__model__'