From 843e2a8b66dd5c5feb31db41b8402e42c26898c3 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Fri, 17 Jan 2020 10:56:22 +0800 Subject: [PATCH] Add Fast-SCNN model (#147) * add fast scnn * fix readme * update model name, add inference time * fix typos --- README.md | 1 + configs/cityscape_fast_scnn.yaml | 53 ++++++ configs/fast_scnn_pet.yaml | 43 +++++ docs/model_zoo.md | 3 + pdseg/loss.py | 11 +- pdseg/models/libs/model_libs.py | 34 ++++ pdseg/models/model_builder.py | 4 +- pdseg/models/modeling/fast_scnn.py | 263 +++++++++++++++++++++++++++++ pdseg/reader.py | 4 +- pretrained_model/download_model.py | 2 + turtorial/finetune_fast_scnn.md | 121 +++++++++++++ 11 files changed, 533 insertions(+), 6 deletions(-) create mode 100644 configs/cityscape_fast_scnn.yaml create mode 100644 configs/fast_scnn_pet.yaml create mode 100644 pdseg/models/modeling/fast_scnn.py create mode 100644 turtorial/finetune_fast_scnn.md diff --git a/README.md b/README.md index ab654768..cdccd770 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ pip install -r requirements.txt * [ICNet模型使用教程](./turtorial/finetune_icnet.md) * [PSPNet模型使用教程](./turtorial/finetune_pspnet.md) * [HRNet模型使用教程](./turtorial/finetune_hrnet.md) +* [Fast-SCNN模型使用教程](./turtorial/finetune_fast_scnn.md) ### 预测部署 diff --git a/configs/cityscape_fast_scnn.yaml b/configs/cityscape_fast_scnn.yaml new file mode 100644 index 00000000..d9e996d6 --- /dev/null +++ b/configs/cityscape_fast_scnn.yaml @@ -0,0 +1,53 @@ +EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling +TRAIN_CROP_SIZE: (1024, 1024) # (width, height), for unpadding rangescaling and stepscaling +AUG: + AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling + FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding + INF_RESIZE_VALUE: 500 # for rangescaling + MAX_RESIZE_VALUE: 600 # for rangescaling + MIN_RESIZE_VALUE: 400 # for rangescaling + MAX_SCALE_FACTOR: 2.0 # for stepscaling + MIN_SCALE_FACTOR: 0.5 # for stepscaling + SCALE_STEP_SIZE: 0.25 # for stepscaling + MIRROR: True + FLIP: False + FLIP_RATIO: 0.2 + RICH_CROP: + ENABLE: True + ASPECT_RATIO: 0.0 + BLUR: False + BLUR_RATIO: 0.1 + MAX_ROTATION: 0 + MIN_AREA_RATIO: 0.0 + BRIGHTNESS_JITTER_RATIO: 0.4 + CONTRAST_JITTER_RATIO: 0.4 + SATURATION_JITTER_RATIO: 0.4 +BATCH_SIZE: 12 +MEAN: [0.5, 0.5, 0.5] +STD: [0.5, 0.5, 0.5] +DATASET: + DATA_DIR: "./dataset/cityscapes/" + IMAGE_TYPE: "rgb" # choice rgb or rgba + NUM_CLASSES: 19 + TEST_FILE_LIST: "dataset/cityscapes/val.list" + TRAIN_FILE_LIST: "dataset/cityscapes/train.list" + VAL_FILE_LIST: "dataset/cityscapes/val.list" + IGNORE_INDEX: 255 +FREEZE: + MODEL_FILENAME: "model" + PARAMS_FILENAME: "params" +MODEL: + DEFAULT_NORM_TYPE: "bn" + MODEL_NAME: "fast_scnn" + +TEST: + TEST_MODEL: "snapshots/cityscape_fast_scnn/final/" +TRAIN: + MODEL_SAVE_DIR: "snapshots/cityscape_fast_scnn/" + SNAPSHOT_EPOCH: 10 +SOLVER: + LR: 0.001 + LR_POLICY: "poly" + OPTIMIZER: "sgd" + NUM_EPOCHS: 100 + diff --git a/configs/fast_scnn_pet.yaml b/configs/fast_scnn_pet.yaml new file mode 100644 index 00000000..02fdef8e --- /dev/null +++ b/configs/fast_scnn_pet.yaml @@ -0,0 +1,43 @@ +TRAIN_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling +EVAL_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling +AUG: + AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling + FIX_RESIZE_SIZE: (512, 512) # (width, height), for unpadding + + INF_RESIZE_VALUE: 500 # for rangescaling + MAX_RESIZE_VALUE: 600 # for rangescaling + MIN_RESIZE_VALUE: 400 # for rangescaling + + MAX_SCALE_FACTOR: 1.25 # for stepscaling + MIN_SCALE_FACTOR: 0.75 # for stepscaling + SCALE_STEP_SIZE: 0.25 # for stepscaling + MIRROR: True +BATCH_SIZE: 4 +DATASET: + DATA_DIR: "./dataset/mini_pet/" + IMAGE_TYPE: "rgb" # choice rgb or rgba + NUM_CLASSES: 3 + TEST_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" + TRAIN_FILE_LIST: "./dataset/mini_pet/file_list/train_list.txt" + VAL_FILE_LIST: "./dataset/mini_pet/file_list/val_list.txt" + VIS_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" + IGNORE_INDEX: 255 + SEPARATOR: " " +FREEZE: + MODEL_FILENAME: "__model__" + PARAMS_FILENAME: "__params__" +MODEL: + MODEL_NAME: "fast_scnn" + DEFAULT_NORM_TYPE: "bn" + +TRAIN: + PRETRAINED_MODEL_DIR: "./pretrained_model/fast_scnn_cityscape/" + MODEL_SAVE_DIR: "./saved_model/fast_scnn_pet/" + SNAPSHOT_EPOCH: 10 +TEST: + TEST_MODEL: "./saved_model/fast_scnn_pet/final" +SOLVER: + NUM_EPOCHS: 100 + LR: 0.005 + LR_POLICY: "poly" + OPTIMIZER: "sgd" diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 2b18260e..8cd89fa4 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -63,3 +63,6 @@ train数据集合为Cityscapes训练集合,测试为Cityscapes的验证集合 | PSPNet/bn | Cityscapes |[pspnet50_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet50_cityscapes.tgz) |16|false| 0.7013 | | PSPNet/bn | Cityscapes |[pspnet101_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz) |16|false| 0.7734 | | HRNet_W18/bn | Cityscapes |[hrnet_w18_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz) | 4 | false | 0.7936 | +| Fast-SCNN/bn | Cityscapes |[fast_scnn_cityscapes.tar](https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar) | 32 | false | 0.6964 | + +测试环境为python 3.7.3,v100,cudnn 7.6.2。 diff --git a/pdseg/loss.py b/pdseg/loss.py index 66f04f4a..14f1b379 100644 --- a/pdseg/loss.py +++ b/pdseg/loss.py @@ -71,6 +71,7 @@ def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None ignore_mask.stop_gradient = True return avg_loss + # to change, how to appicate ignore index and ignore mask def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: @@ -93,6 +94,7 @@ def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): ignore_mask.stop_gradient = True return fluid.layers.reduce_mean(dice_score) + def bce_loss(logit, label, ignore_mask=None): if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: raise Exception("bce loss is only applicable to binary classfication") @@ -112,16 +114,18 @@ def multi_softmax_with_loss(logits, label, ignore_mask=None, num_classes=2, weig if isinstance(logits, tuple): avg_loss = 0 for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != + if label.shape[2] != logit.shape[2] or label.shape[3] != logit.shape[3]: + label = fluid.layers.resize_nearest(label, logit.shape[2:]) + logit_mask = (label.astype('int32') != cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = softmax_with_loss(logit, logit_label, logit_mask, + loss = softmax_with_loss(logit, label, logit_mask, num_classes) avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss else: avg_loss = softmax_with_loss(logits, label, ignore_mask, num_classes, weight=weight) return avg_loss + def multi_dice_loss(logits, label, ignore_mask=None): if isinstance(logits, tuple): avg_loss = 0 @@ -135,6 +139,7 @@ def multi_dice_loss(logits, label, ignore_mask=None): avg_loss = dice_loss(logits, label, ignore_mask) return avg_loss + def multi_bce_loss(logits, label, ignore_mask=None): if isinstance(logits, tuple): avg_loss = 0 diff --git a/pdseg/models/libs/model_libs.py b/pdseg/models/libs/model_libs.py index 19afe542..84494a9d 100644 --- a/pdseg/models/libs/model_libs.py +++ b/pdseg/models/libs/model_libs.py @@ -164,3 +164,37 @@ def separate_conv(input, channel, stride, filter, dilation=1, act=None): input = bn(input) if act: input = act(input) return input + + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=fluid.ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=fluid.ParamAttr(name=bn_name + "_scale"), + bias_attr=fluid.ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn \ No newline at end of file diff --git a/pdseg/models/model_builder.py b/pdseg/models/model_builder.py index 65483b33..3ff7e1ea 100644 --- a/pdseg/models/model_builder.py +++ b/pdseg/models/model_builder.py @@ -24,7 +24,7 @@ from utils.config import cfg from loss import multi_softmax_with_loss from loss import multi_dice_loss from loss import multi_bce_loss -from models.modeling import deeplab, unet, icnet, pspnet, hrnet +from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn class ModelPhase(object): @@ -81,6 +81,8 @@ def seg_model(image, class_num): logits = pspnet.pspnet(image, class_num) elif model_name == 'hrnet': logits = hrnet.hrnet(image, class_num) + elif model_name == 'fast_scnn': + logits = fast_scnn.fast_scnn(image, class_num) else: raise Exception( "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet" diff --git a/pdseg/models/modeling/fast_scnn.py b/pdseg/models/modeling/fast_scnn.py new file mode 100644 index 00000000..b1ecdffe --- /dev/null +++ b/pdseg/models/modeling/fast_scnn.py @@ -0,0 +1,263 @@ +# coding: utf8 +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from models.libs.model_libs import scope +from models.libs.model_libs import bn, bn_relu, relu, conv_bn_layer +from models.libs.model_libs import conv, avg_pool +from models.libs.model_libs import separate_conv +from utils.config import cfg + + +def learning_to_downsample(x, dw_channels1=32, dw_channels2=48, out_channels=64): + x = relu(bn(conv(x, dw_channels1, 3, 2))) + with scope('dsconv1'): + x = separate_conv(x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv(x, out_channels, stride=2, filter=3, act=fluid.layers.relu) + return x + + +def shortcut(input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + +def dropout2d(input, prob, is_train=False): + if not is_train: + return input + channels = input.shape[1] + keep_prob = 1.0 - prob + random_tensor = keep_prob + fluid.layers.uniform_random_batch_size_like(input, [-1, channels, 1, 1], min=0., max=1.) + binary_tensor = fluid.layers.floor(random_tensor) + output = input / keep_prob * binary_tensor + return output + + +def inverted_residual_unit(input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + depthwise_output = bottleneck_conv + + linear_out = conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + + if ifshortcut: + out = shortcut(input=input, data_residual=linear_out) + return out, depthwise_output + else: + return linear_out, depthwise_output + + +def inverted_blocks(input, in_c, t, c, n, s, name=None): + first_block, depthwise_output = inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block, depthwise_output = inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block, depthwise_output + + +def psp_module(input, out_features): + + cat_layers = [] + sizes = (1, 2, 3, 6) + for size in sizes: + psp_name = "psp" + str(size) + with scope(psp_name): + pool = fluid.layers.adaptive_pool2d(input, + pool_size=[size, size], + pool_type='avg', + name=psp_name + '_adapool') + data = conv(pool, out_features, + filter_size=1, + bias_attr=False, + name=psp_name + '_conv') + data_bn = bn(data, act='relu') + interp = fluid.layers.resize_bilinear(data_bn, + out_shape=input.shape[2:], + name=psp_name + '_interp', align_mode=0) + cat_layers.append(interp) + cat_layers = [input] + cat_layers + out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') + + return out + + +class FeatureFusionModule: + """Feature fusion module""" + + def __init__(self, higher_in_channels, lower_in_channels, out_channels, scale_factor=4): + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + + def net(self, higher_res_feature, lower_res_feature): + h, w = higher_res_feature.shape[2:] + lower_res_feature = fluid.layers.resize_bilinear(lower_res_feature, [h, w], align_mode=0) + + with scope('dwconv'): + lower_res_feature = relu(bn(conv(lower_res_feature, self.out_channels, 1)))#(lower_res_feature) + with scope('conv_lower_res'): + lower_res_feature = bn(conv(lower_res_feature, self.out_channels, 1, bias_attr=True)) + with scope('conv_higher_res'): + higher_res_feature = bn(conv(higher_res_feature, self.out_channels, 1, bias_attr=True)) + out = higher_res_feature + lower_res_feature + + return relu(out) + + +class GlobalFeatureExtractor(): + """Global feature extractor module""" + + def __init__(self, in_channels=64, block_channels=(64, 96, 128), out_channels=128, + t=6, num_blocks=(3, 3, 3)): + self.in_channels = in_channels + self.block_channels = block_channels + self.out_channels = out_channels + self.t = t + self.num_blocks = num_blocks + + def net(self, x): + x, _ = inverted_blocks(x, self.in_channels, self.t, self.block_channels[0], + self.num_blocks[0], 2, 'inverted_block_1') + x, _ = inverted_blocks(x, self.block_channels[0], self.t, self.block_channels[1], + self.num_blocks[1], 2, 'inverted_block_2') + x, _ = inverted_blocks(x, self.block_channels[1], self.t, self.block_channels[2], + self.num_blocks[2], 1, 'inverted_block_3') + x = psp_module(x, self.block_channels[2] // 4) + with scope('out'): + x = relu(bn(conv(x, self.out_channels, 1))) + return x + + +class Classifier: + """Classifier""" + + def __init__(self, dw_channels, num_classes, stride=1): + self.dw_channels = dw_channels + self.num_classes = num_classes + self.stride = stride + + def net(self, x): + with scope('dsconv1'): + x = separate_conv(x, self.dw_channels, stride=self.stride, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv(x, self.dw_channels, stride=self.stride, filter=3, act=fluid.layers.relu) + x = dropout2d(x, 0.1, is_train=cfg.PHASE=='train') + x = conv(x, self.num_classes, 1, bias_attr=True) + return x + + +def aux_layer(x, num_classes): + x = relu(bn(conv(x, 32, 3, padding=1))) + x = dropout2d(x, 0.1, is_train=(cfg.PHASE == 'train')) + with scope('logit'): + x = conv(x, num_classes, 1, bias_attr=True) + return x + + +def fast_scnn(img, num_classes): + size = img.shape[2:] + classifier = Classifier(128, num_classes) + + global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) + feature_fusion = FeatureFusionModule(64, 128, 128) + + with scope('learning_to_downsample'): + higher_res_features = learning_to_downsample(img, 32, 48, 64) + with scope('global_feature_extractor'): + lower_res_feature = global_feature_extractor.net(higher_res_features) + with scope('feature_fusion'): + x = feature_fusion.net(higher_res_features, lower_res_feature) + with scope('classifier'): + logit = classifier.net(x) + logit = fluid.layers.resize_bilinear(logit, size, align_mode=0) + + if len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 3: + with scope('aux_layer_higher'): + higher_logit = aux_layer(higher_res_features, num_classes) + higher_logit = fluid.layers.resize_bilinear(higher_logit, size, align_mode=0) + with scope('aux_layer_lower'): + lower_logit = aux_layer(lower_res_feature, num_classes) + lower_logit = fluid.layers.resize_bilinear(lower_logit, size, align_mode=0) + return logit, higher_logit, lower_logit + elif len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 2: + with scope('aux_layer_higher'): + higher_logit = aux_layer(higher_res_features, num_classes) + higher_logit = fluid.layers.resize_bilinear(higher_logit, size, align_mode=0) + return logit, higher_logit + + return logit \ No newline at end of file diff --git a/pdseg/reader.py b/pdseg/reader.py index d3c3659e..7f1fd6fb 100644 --- a/pdseg/reader.py +++ b/pdseg/reader.py @@ -98,8 +98,8 @@ class SegDataset(object): # Re-shuffle file list if self.shuffle and cfg.NUM_TRAINERS > 1: np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // self.num_trainers - self.lines = self.all_lines[num_lines * self.trainer_id: num_lines * (self.trainer_id + 1)] + num_lines = len(self.all_lines) // cfg.NUM_TRAINERS + self.lines = self.all_lines[num_lines * cfg.TRAINER_ID: num_lines * (cfg.TRAINER_ID + 1)] self.shuffle_seed += 1 elif self.shuffle: np.random.shuffle(self.lines) diff --git a/pretrained_model/download_model.py b/pretrained_model/download_model.py index 12b01472..28b5ae42 100644 --- a/pretrained_model/download_model.py +++ b/pretrained_model/download_model.py @@ -81,6 +81,8 @@ model_urls = { "https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz", "hrnet_w18_bn_cityscapes": "https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz", + "fast_scnn_cityscapes": + "https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar", } if __name__ == "__main__": diff --git a/turtorial/finetune_fast_scnn.md b/turtorial/finetune_fast_scnn.md new file mode 100644 index 00000000..d16f3533 --- /dev/null +++ b/turtorial/finetune_fast_scnn.md @@ -0,0 +1,121 @@ +# Fast-SCNN模型训练教程 + +* 本教程旨在介绍如何通过使用PaddleSeg提供的 ***`Fast_scnn_cityscape`*** 预训练模型在自定义数据集上进行训练。 + +* 在阅读本教程前,请确保您已经了解过PaddleSeg的[快速入门](../README.md#快速入门)和[基础功能](../README.md#基础功能)等章节,以便对PaddleSeg有一定的了解 + +* 本教程的所有命令都基于PaddleSeg主目录进行执行 + +## 一. 准备待训练数据 + +我们提前准备好了一份数据集,通过以下代码进行下载 + +```shell +python dataset/download_pet.py +``` + +## 二. 下载预训练模型 + +关于PaddleSeg支持的所有预训练模型的列表,我们可以从[模型组合](#模型组合)中查看我们所需模型的名字和配置 + +接着下载对应的预训练模型 + +```shell +python pretrained_model/download_model.py fast_scnn_cityscapes +``` + +## 三. 准备配置 + +接着我们需要确定相关配置,从本教程的角度,配置分为三部分: + +* 数据集 + * 训练集主目录 + * 训练集文件列表 + * 测试集文件列表 + * 评估集文件列表 +* 预训练模型 + * 预训练模型名称 + * 预训练模型的backbone网络 + * 预训练模型的Normalization类型 + * 预训练模型路径 +* 其他 + * 学习率 + * Batch大小 + * ... + +在三者中,预训练模型的配置尤为重要,如果模型或者BACKBONE配置错误,会导致预训练的参数没有加载,进而影响收敛速度。预训练模型相关的配置如第二步所展示。 + +数据集的配置和数据路径有关,在本教程中,数据存放在`dataset/mini_pet`中 + +其他配置则根据数据集和机器环境的情况进行调节,最终我们保存一个如下内容的yaml配置文件,存放路径为**configs/fast_scnn_pet.yaml** + +```yaml +# 数据集配置 +DATASET: + DATA_DIR: "./dataset/mini_pet/" + NUM_CLASSES: 3 + TEST_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" + TRAIN_FILE_LIST: "./dataset/mini_pet/file_list/train_list.txt" + VAL_FILE_LIST: "./dataset/mini_pet/file_list/val_list.txt" + VIS_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" + +# 预训练模型配置 +MODEL: + MODEL_NAME: "fast_scnn" + DEFAULT_NORM_TYPE: "bn" + +# 其他配置 +TRAIN_CROP_SIZE: (512, 512) +EVAL_CROP_SIZE: (512, 512) +AUG: + AUG_METHOD: "unpadding" + FIX_RESIZE_SIZE: (512, 512) +BATCH_SIZE: 4 +TRAIN: + PRETRAINED_MODEL_DIR: "./pretrained_model/fast_scnn_cityscape/" + MODEL_SAVE_DIR: "./saved_model/fast_scnn_pet/" + SNAPSHOT_EPOCH: 10 +TEST: + TEST_MODEL: "./saved_model/fast_scnn_pet/final" +SOLVER: + NUM_EPOCHS: 100 + LR: 0.005 + LR_POLICY: "poly" + OPTIMIZER: "sgd" +``` + +## 四. 配置/数据校验 + +在开始训练和评估之前,我们还需要对配置和数据进行一次校验,确保数据和配置是正确的。使用下述命令启动校验流程 + +```shell +python pdseg/check.py --cfg ./configs/fast_scnn_pet.yaml +``` + + +## 五. 开始训练 + +校验通过后,使用下述命令启动训练 + +```shell +python pdseg/train.py --use_gpu --cfg ./configs/fast_scnn_pet.yaml +``` + +## 六. 进行评估 + +模型训练完成,使用下述命令启动评估 + +```shell +python pdseg/eval.py --use_gpu --cfg ./configs/fast_scnn_pet.yaml +``` + + +## 七. 实时分割模型推理时间比较 + +| 模型 | eval size | inference time | mIoU on cityscape val| +|---|---|---|---| +| DeepLabv3+/MobileNetv2/bn | (1024, 2048) |24.12ms| 0.698| +| ICNet/bn |(1024, 2048) |25.24ms| 0.6831 | +| Fast-SCNN/bn | (1024, 2048) |17.24ms| 0.6964 | + + -- GitLab