diff --git a/pdseg/models/modeling/pspnet.py b/pdseg/models/modeling/pspnet.py index 14a867a1f6bd94ad71d7bd994d54dfc6c1c0268c..02a1c5a4d89212af0b8b7253e2e0569d91bbb172 100644 --- a/pdseg/models/modeling/pspnet.py +++ b/pdseg/models/modeling/pspnet.py @@ -12,6 +12,7 @@ from models.backbone.resnet import ResNet as resnet_backbone from utils.config import cfg def get_logit_interp(input, num_classes, out_shape, name="logit"): + # 根据类别数决定最后一层卷积输出, 并插值回原始尺寸 param_attr = fluid.ParamAttr( name=name + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( @@ -19,13 +20,12 @@ def get_logit_interp(input, num_classes, out_shape, name="logit"): initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) with scope(name): - logit = conv( - input, - num_classes, - filter_size=1, - param_attr=param_attr, - bias_attr=True, - name=name+'.conv2d.output.1') + logit = conv(input, + num_classes, + filter_size=1, + param_attr=param_attr, + bias_attr=True, + name=name+'_conv') logit_interp = fluid.layers.resize_bilinear( logit, out_shape=out_shape, @@ -34,53 +34,67 @@ def get_logit_interp(input, num_classes, out_shape, name="logit"): def psp_module(input, out_features): + # Pyramid Scene Parsing 金字塔池化模块 + # 输入:backbone输出的特征 + # 输出:对输入进行不同尺度pooling, 卷积操作后插值回原始尺寸,并concat + # 最后进行一个卷积及BN操作 + cat_layers = [] sizes = (1,2,3,6) for size in sizes: psp_name = "psp_conv" + str(size) with scope(psp_name): pool = fluid.layers.adaptive_pool2d(input, - pool_size=[size, size], - pool_type='avg', - name=psp_name+'_adapool') - data = conv(pool, out_features, filter_size=1, bias_attr=True, - name= psp_name + '.conv2d.output.1') + pool_size=[size, size], + pool_type='avg', + name=psp_name+'_adapool') + data = conv(pool, out_features, + filter_size=1, + bias_attr=True, + name= psp_name + '_conv') data_bn = bn(data, act='relu') interp = fluid.layers.resize_bilinear(data_bn, - out_shape=input.shape[2:], - name=psp_name+'_interp') + out_shape=input.shape[2:], + name=psp_name+'_interp') cat_layers.append(interp) cat_layers = [input] + cat_layers[::-1] cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') - with scope("psp_conv_end"): + + psp_end_name = "psp_conv_end" + with scope(psp_end_name): data = conv(cat, - out_features, - filter_size=3, - padding=1, - bias_attr=True, - name='psp_conv_end.conv2d.output.1') + out_features, + filter_size=3, + padding=1, + bias_attr=True, + name=psp_end_name) out = bn(data, act='relu') return out def resnet(input): - # PSPNET backbone: resnet, ĬÈresnet50 - # end_points: resnetÖֹ²ã - + # PSPNET backbone: resnet, 默认resnet50 + # end_points: resnet终止层数 + # dilation_dict: resnet block数及对应的膨胀卷积尺度 + scale = cfg.MODEL.ICNET.DEPTH_MULTIPLIER scale = cfg.MODEL.PSPNET.DEPTH_MULTIPLIER layers = cfg.MODEL.PSPNET.LAYERS end_points = layers - 1 dilation_dict = {2:2, 3:4} model = resnet_backbone(layers, scale, stem='pspnet') - data, _ = model.net(input, end_points=end_points, dilation_dict=dilation_dict) + data, _ = model.net(input, + end_points=end_points, + dilation_dict=dilation_dict) return data def pspnet(input, num_classes): + # Backbone: ResNet res = resnet(input) + # PSP模块 psp = psp_module(res, 512) - #dropout = fluid.layers.dropout(psp, dropout_prob=0.1, name="dropout") - logit = get_logit_interp(psp, num_classes, input.shape[2:]) + dropout = fluid.layers.dropout(psp, dropout_prob=0.1, name="dropout") + # 根据类别数决定最后一层卷积输出, 并插值回原始尺寸 + logit = get_logit_interp(dropout, num_classes, input.shape[2:]) return logit -