diff --git a/dygraph/README.md b/dygraph/README.md index 20728e6bb5321e78122b9e925544419e9ec4483c..fd9af0d0dac1c2011479de4cab6929e34c18a515 100644 --- a/dygraph/README.md +++ b/dygraph/README.md @@ -1,43 +1,28 @@ # 动态图执行 -## 数据集设置 -``` -data_dir='data/path' -train_list='train/list/path' -val_list='val/list/path' -test_list='test/list/path' -num_classes=number/of/dataset/classes -``` - ## 训练 ``` python3 train.py --model_name UNet \ ---data_dir $data_dir \ ---train_list $train_list \ ---val_list $val_list \ ---num_classes $num_classes \ +--dataset OpticDiscSeg \ --input_size 192 192 \ ---num_epochs 4 \ +--num_epochs 10 \ --save_interval_epochs 1 \ +--do_eval \ --save_dir output ``` ## 评估 ``` python3 val.py --model_name UNet \ ---data_dir $data_dir \ ---val_list $val_list \ ---num_classes $num_classes \ +--dataset OpticDiscSeg \ --input_size 192 192 \ ---model_dir output/epoch_1 +--model_dir output/best_model ``` ## 预测 ``` python3 infer.py --model_name UNet \ ---data_dir $data_dir \ ---test_list $test_list \ ---num_classes $num_classes \ ---input_size 192 192 \ ---model_dir output/epoch_1 +--dataset OpticDiscSeg \ +--model_dir output/best_model \ +--input_size 192 192 ``` diff --git a/dygraph/datasets/__init__.py b/dygraph/datasets/__init__.py index 072a82f7409a9369d2c3b1bdba603527eac0bb7f..9a52eccf7454cd751a28ef32e7daf22b3d0164e2 100644 --- a/dygraph/datasets/__init__.py +++ b/dygraph/datasets/__init__.py @@ -12,5 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .dataset import Dataset from .optic_disc_seg import OpticDiscSeg from .cityscapes import Cityscapes diff --git a/dygraph/datasets/cityscapes.py b/dygraph/datasets/cityscapes.py index 21f967820ec32aa37b1877ae7d583eb3e5aac674..0bfd43febbfe0d7dfbde5e25c4476fe34c8aac5d 100644 --- a/dygraph/datasets/cityscapes.py +++ b/dygraph/datasets/cityscapes.py @@ -14,8 +14,7 @@ import os -from paddle.fluid.io import Dataset - +from .dataset import Dataset from utils.download import download_file_and_uncompress DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') @@ -70,16 +69,3 @@ class Cityscapes(Dataset): image_path = os.path.join(self.data_dir, items[0]) grt_path = os.path.join(self.data_dir, items[1]) self.file_list.append([image_path, grt_path]) - - def __getitem__(self, idx): - image_path, grt_path = self.file_list[idx] - im, im_info, label = self.transforms(im=image_path, label=grt_path) - if self.mode == 'train': - return im, label - elif self.mode == 'eval': - return im, label - if self.mode == 'test': - return im, im_info, image_path - - def __len__(self): - return len(self.file_list) diff --git a/dygraph/datasets/dataset.py b/dygraph/datasets/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..908e90b4f4159e997446d0da40374ccde79abf9b --- /dev/null +++ b/dygraph/datasets/dataset.py @@ -0,0 +1,105 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle.fluid as fluid +import numpy as np +from PIL import Image + + +class Dataset(fluid.io.Dataset): + def __init__(self, + data_dir, + num_classes, + train_list=None, + val_list=None, + test_list=None, + separator=' ', + transforms=None, + mode='train'): + self.data_dir = data_dir + self.transforms = transforms + self.file_list = list() + self.mode = mode + self.num_classes = num_classes + + if mode.lower() not in ['train', 'eval', 'test']: + raise Exception( + "mode should be 'train', 'eval' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise Exception("transform is necessary, but it is None.") + + self.data_dir = data_dir + if mode == 'train': + if train_list is None: + raise Exception( + 'When mode is "train", train_list is need, but it is None.') + elif not os.path.exists(train_list): + raise Exception( + 'train_list is not found: {}'.format(train_list)) + else: + file_list = train_list + elif mode == 'eval': + if val_list is None: + raise Exception( + 'When mode is "eval", val_list is need, but it is None.') + elif not os.path.exists(val_list): + raise Exception('val_list is not found: {}'.format(val_list)) + else: + file_list = val_list + else: + if test_list is None: + raise Exception( + 'When mode is "test", test_list is need, but it is None.') + elif not os.path.exists(test_list): + raise Exception('test_list is not found: {}'.format(test_list)) + else: + file_list = test_list + + with open(file_list, 'r') as f: + for line in f: + items = line.strip().split(separator) + if len(items) != 2: + if mode == 'train' or mode == 'eval': + raise Exception( + "File list format incorrect! It should be" + " image_name{}label_name\\n".format(separator)) + image_path = os.path.join(self.data_dir, items[0]) + grt_path = None + else: + image_path = os.path.join(self.data_dir, items[0]) + grt_path = os.path.join(self.data_dir, items[1]) + self.file_list.append([image_path, grt_path]) + + def __getitem__(self, idx): + image_path, grt_path = self.file_list[idx] + if self.mode == 'train': + im, im_info, label = self.transforms(im=image_path, label=grt_path) + return im, label + elif self.mode == 'eval': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + label = np.asarray(Image.open(grt_path)) + label = label[np.newaxis, np.newaxis, :, :] + return im, im_info, label + if self.mode == 'test': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + return im, im_info, image_path + + def __len__(self): + return len(self.file_list) diff --git a/dygraph/datasets/optic_disc_seg.py b/dygraph/datasets/optic_disc_seg.py index 0a321915e90c18e99e46d0e53473e695b1ec2317..9794949a0688e1abe29212a1cd92048b6ceab622 100644 --- a/dygraph/datasets/optic_disc_seg.py +++ b/dygraph/datasets/optic_disc_seg.py @@ -14,8 +14,7 @@ import os -from paddle.fluid.io import Dataset - +from .dataset import Dataset from utils.download import download_file_and_uncompress DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') @@ -70,16 +69,3 @@ class OpticDiscSeg(Dataset): image_path = os.path.join(self.data_dir, items[0]) grt_path = os.path.join(self.data_dir, items[1]) self.file_list.append([image_path, grt_path]) - - def __getitem__(self, idx): - image_path, grt_path = self.file_list[idx] - im, im_info, label = self.transforms(im=image_path, label=grt_path) - if self.mode == 'train': - return im, label - elif self.mode == 'eval': - return im, label - if self.mode == 'test': - return im, im_info, image_path - - def __len__(self): - return len(self.file_list) diff --git a/dygraph/infer.py b/dygraph/infer.py index af745a39b025e9e804c207989c939d454d7ff25f..0b25a48ff9c2c3ffbe9532d48c95564173364b2c 100644 --- a/dygraph/infer.py +++ b/dygraph/infer.py @@ -24,7 +24,7 @@ import tqdm from datasets import OpticDiscSeg, Cityscapes import transforms as T -import models +from models import MODELS import utils import utils.logging as logging from utils import get_environ_info @@ -37,7 +37,8 @@ def parse_args(): parser.add_argument( '--model_name', dest='model_name', - help="Model type for traing, which is one of ('UNet')", + help='Model type for testing, which is one of {}'.format( + str(list(MODELS.keys()))), type=str, default='UNet') @@ -97,19 +98,20 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'): logging.info("Start to predict...") for im, im_info, im_path in tqdm.tqdm(test_dataset): - im = im[np.newaxis, ...] im = to_variable(im) pred, _ = model(im, mode='test') pred = pred.numpy() pred = np.squeeze(pred).astype('uint8') - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] + for info in im_info[::-1]: + if info[0] == 'resize': + h, w = info[1][0], info[1][1] pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] + elif info[0] == 'padding': + h, w = info[1][0], info[1][1] pred = pred[0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format( + info[0])) im_file = im_path.replace(test_dataset.data_dir, '') if im_file[0] == '/': @@ -146,8 +148,11 @@ def main(args): test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()]) test_dataset = dataset(transforms=test_transforms, mode='test') - if args.model_name == 'UNet': - model = models.UNet(num_classes=test_dataset.num_classes) + if args.model_name not in MODELS: + raise Exception( + '--model_name is invalid. it should be one of {}'.format( + str(list(MODELS.keys())))) + model = MODELS[args.model_name](num_classes=test_dataset.num_classes) infer( model, diff --git a/dygraph/models/__init__.py b/dygraph/models/__init__.py index 792059698bdbc5f95acbd18a0f3cbc6b6ec769e5..0057d88f154f93542c00b0d3dd22a8e52005764c 100644 --- a/dygraph/models/__init__.py +++ b/dygraph/models/__init__.py @@ -13,3 +13,28 @@ # limitations under the License. from .unet import UNet +from .hrnet import * + +MODELS = { + "UNet": UNet, + "HRNet_W18_Small_V1": HRNet_W18_Small_V1, + "HRNet_W18_Small_V2": HRNet_W18_Small_V2, + "HRNet_W18": HRNet_W18, + "HRNet_W30": HRNet_W30, + "HRNet_W32": HRNet_W32, + "HRNet_W40": HRNet_W40, + "HRNet_W44": HRNet_W44, + "HRNet_W48": HRNet_W48, + "HRNet_W60": HRNet_W48, + "HRNet_W64": HRNet_W64, + "SE_HRNet_W18_Small_V1": SE_HRNet_W18_Small_V1, + "SE_HRNet_W18_Small_V2": SE_HRNet_W18_Small_V2, + "SE_HRNet_W18": SE_HRNet_W18, + "SE_HRNet_W30": SE_HRNet_W30, + "SE_HRNet_W32": SE_HRNet_W30, + "SE_HRNet_W40": SE_HRNet_W40, + "SE_HRNet_W44": SE_HRNet_W44, + "SE_HRNet_W48": SE_HRNet_W48, + "SE_HRNet_W60": SE_HRNet_W60, + "SE_HRNet_W64": SE_HRNet_W64 +} diff --git a/dygraph/models/hrnet.py b/dygraph/models/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2dcf2ddad0aad6e83356fe884ad213660b6816f3 --- /dev/null +++ b/dygraph/models/hrnet.py @@ -0,0 +1,1063 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear +try: + from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm +except: + from paddle.fluid.dygraph import BatchNorm + +__all__ = [ + "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30", + "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", + "HRNet_W64", "SE_HRNet_W18_Small_V1", "SE_HRNet_W18_Small_V2", + "SE_HRNet_W18", "SE_HRNet_W30", "SE_HRNet_W32", "SE_HRNet_W40", + "SE_HRNet_W44", "SE_HRNet_W48", "SE_HRNet_W60", "SE_HRNet_W64" +] + + +class HRNet(fluid.dygraph.Layer): + def __init__(self, + num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + has_se=False, + ignore_index=255): + super(HRNet, self).__init__() + + self.num_classes = num_classes + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.ignore_index = ignore_index + self.EPS = 1e-5 + + self.conv_layer1_1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="layer1_1") + + self.conv_layer1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="layer1_2") + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], + out_channels=self.stage2_num_channels, + name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2") + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, + out_channels=self.stage3_num_channels, + name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + name="st3") + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, + out_channels=self.stage4_num_channels, + name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + name="st4") + + last_inp_channels = sum(self.stage4_num_channels) + self.conv_last_2 = ConvBNLayer( + num_channels=last_inp_channels, + num_filters=last_inp_channels, + filter_size=1, + stride=1, + name='conv-2') + self.conv_last_1 = Conv2D( + num_channels=last_inp_channels, + num_filters=self.num_classes, + filter_size=1, + stride=1, + padding=0, + param_attr=ParamAttr(name='conv-1_weights')) + + def forward(self, x, label=None, mode='train'): + input_shape = x.shape[2:] + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w)) + x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w)) + x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w)) + x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1) + x = self.conv_last_2(x) + logit = self.conv_last_1(x) + logit = fluid.layers.resize_bilinear(logit, input_shape) + + if mode == 'train': + if label is None: + raise Exception('Label is need during training') + return self._get_loss(logit, label) + else: + score_map = fluid.layers.softmax(logit, axis=1) + score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) + pred = fluid.layers.argmax(score_map, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + return pred, score_map + + def _get_loss(self, logit, label): + logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) + label = fluid.layers.transpose(label, [0, 2, 3, 1]) + mask = label != self.ignore_index + mask = fluid.layers.cast(mask, 'float32') + loss, probs = fluid.layers.softmax_with_cross_entropy( + logit, + label, + ignore_index=self.ignore_index, + return_softmax=True, + axis=-1) + + loss = loss * mask + avg_loss = fluid.layers.mean(loss) / ( + fluid.layers.mean(mask) + self.EPS) + + label.stop_gradient = True + mask.stop_gradient = True + return avg_loss + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + '_bn' + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, input): + y = self._conv(input) + y = self._batch_norm(y) + return y + + +class Layer1(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + num_blocks, + has_se=False, + name=None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, input): + conv = input + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(fluid.dygraph.Layer): + def __init__(self, in_channels, out_channels, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + ConvBNLayer( + num_channels=in_channels[i], + num_filters=out_channels[i], + filter_size=3, + name=name + '_layer_' + str(i + 1))) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + ConvBNLayer( + num_channels=in_channels[-1], + num_filters=out_channels[i], + filter_size=3, + stride=2, + name=name + '_layer_' + str(i + 1))) + self.conv_bn_func_list.append(residual) + + def forward(self, input): + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(input[idx]) + else: + if idx < len(input): + outs.append(conv_bn_func(input[idx])) + else: + outs.append(conv_bn_func(input[-1])) + return outs + + +class Branches(fluid.dygraph.Layer): + def __init__(self, + num_blocks, + in_channels, + out_channels, + has_se=False, + name=None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, inputs): + outs = [] + for idx, input in enumerate(inputs): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + has_se, + stride=1, + downsample=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu", + name=name + "_conv1", + ) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_conv2") + self.conv3 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_conv3") + + if self.downsample: + self.conv_down = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_downsample") + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, input): + residual = input + conv1 = self.conv1(input) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(input) + + if self.has_se: + conv3 = self.se(conv3) + + y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu") + return y + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride=1, + has_se=False, + downsample=False, + name=None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_conv1") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + act=None, + name=name + "_conv2") + + if self.downsample: + self.conv_down = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + act="relu", + name=name + "_downsample") + + if self.has_se: + self.se = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, input): + residual = input + conv1 = self.conv1(input) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(input) + + if self.has_se: + conv2 = self.se(conv2) + + y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu") + return y + + +class SELayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + act="relu", + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + act="sigmoid", + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + '_exc_offset')) + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + excitation = self.excitation(squeeze) + excitation = fluid.layers.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = input * excitation + return out + + +class Stage(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_modules, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1))) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1))) + + self.stage_func_list.append(stage_func) + + def forward(self, input): + out = input + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, + in_channels=num_channels, + out_channels=num_filters, + has_se=has_se, + name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name) + + def forward(self, input): + out = self.branches_func(input) + out = self.fuse_func(out) + return out + + +class FuseLayers(fluid.dygraph.Layer): + def __init__(self, + in_channels, + out_channels, + multi_scale_output=True, + name=None): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + residual_func = None + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + ConvBNLayer( + num_channels=in_channels[j], + num_filters=out_channels[i], + filter_size=1, + stride=1, + act=None, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1))) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[i], + filter_size=3, + stride=2, + act=None, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1))) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[j], + filter_size=3, + stride=2, + act="relu", + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1))) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, input): + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = input[i] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](input[j]) + residual_func_idx += 1 + + y = fluid.layers.resize_nearest(input=y, scale=2**(j - i)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = input[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + layer_helper = LayerHelper(self.full_name(), act='relu') + residual = layer_helper.append_activation(residual) + outs.append(residual) + + return outs + + +class LastClsOut(fluid.dygraph.Layer): + def __init__(self, + num_channel_list, + has_se, + num_filters_list=[32, 64, 128, 256], + name=None): + super(LastClsOut, self).__init__() + + self.func_list = [] + for idx in range(len(num_channel_list)): + func = self.add_sublayer( + "conv_{}_conv_{}".format(name, idx + 1), + BottleneckBlock( + num_channels=num_channel_list[idx], + num_filters=num_filters_list[idx], + has_se=has_se, + downsample=True, + name=name + 'conv_' + str(idx + 1))) + self.func_list.append(func) + + def forward(self, inputs): + outs = [] + for idx, input in enumerate(inputs): + out = self.func_list[idx](input) + outs.append(out) + return outs + + +def HRNet_W18_Small_V1(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[1], + stage1_num_channels=[32], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[16, 32], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[16, 32, 64], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[16, 32, 64, 128]) + return model + + +def HRNet_W18_Small_V2(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[2], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[18, 36], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[18, 36, 72, 144]) + return model + + +def HRNet_W18(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144]) + return model + + +def HRNet_W30(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[30, 60], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[30, 60, 120], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[30, 60, 120, 240]) + return model + + +def HRNet_W32(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[32, 64], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[32, 64, 128], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[32, 64, 128, 256]) + return model + + +def HRNet_W40(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[40, 80], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[40, 80, 160], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[40, 80, 160, 320]) + return model + + +def HRNet_W44(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[44, 88], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[44, 88, 176], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[44, 88, 176, 352]) + return model + + +def HRNet_W48(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[48, 96], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[48, 96, 192], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[48, 96, 192, 384]) + return model + + +def HRNet_W60(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[60, 120], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[60, 120, 240], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[60, 120, 240, 480]) + return model + + +def HRNet_W64(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[64, 128], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[64, 128, 256], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[64, 128, 256, 512]) + return model + + +def SE_HRNet_W18_Small_V1(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[1], + stage1_num_channels=[32], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[16, 32], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[16, 32, 64], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[16, 32, 64, 128], + has_se=True) + return model + + +def SE_HRNet_W18_Small_V2(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[2], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[18, 36], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[18, 36, 72, 144], + has_se=True) + return model + + +def SE_HRNet_W18(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + has_se=True) + return model + + +def SE_HRNet_W30(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[30, 60], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[30, 60, 120], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[30, 60, 120, 240], + has_se=True) + return model + + +def SE_HRNet_W32(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[32, 64], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[32, 64, 128], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[32, 64, 128, 256], + has_se=True) + return model + + +def SE_HRNet_W40(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[40, 80], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[40, 80, 160], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[40, 80, 160, 320], + has_se=True) + return model + + +def SE_HRNet_W44(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[44, 88], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[44, 88, 176], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[44, 88, 176, 352], + has_se=True) + return model + + +def SE_HRNet_W48(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[48, 96], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[48, 96, 192], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[48, 96, 192, 384], + has_se=True) + return model + + +def SE_HRNet_W60(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[60, 120], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[60, 120, 240], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[60, 120, 240, 480], + has_se=True) + return model + + +def SE_HRNet_W64(num_classes): + model = HRNet( + num_classes=num_classes, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[64, 128], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[64, 128, 256], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[64, 128, 256, 512], + has_se=True) + return model diff --git a/dygraph/models/unet.py b/dygraph/models/unet.py index b55e3614b6988a0102eb3e6f17093e59673eae70..970936d05e0fb67a58973879ffceef5f02816495 100644 --- a/dygraph/models/unet.py +++ b/dygraph/models/unet.py @@ -13,7 +13,11 @@ # limitations under the License. import paddle.fluid as fluid -from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D +from paddle.fluid.dygraph import Conv2D, Pool2D +try: + from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm +except: + from paddle.fluid.dygraph import BatchNorm class UNet(fluid.dygraph.Layer): @@ -39,6 +43,8 @@ class UNet(fluid.dygraph.Layer): return pred, score_map def _get_loss(self, logit, label): + logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) + label = fluid.layers.transpose(label, [0, 2, 3, 1]) mask = label != self.ignore_index mask = fluid.layers.cast(mask, 'float32') loss, probs = fluid.layers.softmax_with_cross_entropy( diff --git a/dygraph/train.py b/dygraph/train.py index 52aa032c454a5cc72ddf9cc3b27cee5b415511eb..70b61aaf839af9e4a6d44046037e7db703a8abcc 100644 --- a/dygraph/train.py +++ b/dygraph/train.py @@ -22,7 +22,7 @@ from paddle.incubate.hapi.distributed import DistributedBatchSampler from datasets import OpticDiscSeg, Cityscapes import transforms as T -import models +from models import MODELS import utils.logging as logging from utils import get_environ_info from utils import load_pretrained_model @@ -38,7 +38,8 @@ def parse_args(): parser.add_argument( '--model_name', dest='model_name', - help="Model type for traing, which is one of ('UNet')", + help='Model type for training, which is one of {}'.format( + str(list(MODELS.keys()))), type=str, default='UNet') @@ -181,7 +182,7 @@ def train(model, total_steps = steps_per_epoch * (num_epochs - start_epoch) num_steps = 0 best_mean_iou = -1.0 - best_model_epoch = 1 + best_model_epoch = -1 for epoch in range(start_epoch, num_epochs): for step, data in enumerate(loader): images = data[0] @@ -229,10 +230,8 @@ def train(model, mean_iou, mean_acc = evaluate( model, eval_dataset, - places=places, model_dir=current_save_dir, num_classes=num_classes, - batch_size=batch_size, ignore_index=ignore_index, epoch_id=epoch + 1) if mean_iou > best_mean_iou: @@ -241,9 +240,9 @@ def train(model, best_model_dir = os.path.join(save_dir, "best_model") fluid.save_dygraph(model.state_dict(), os.path.join(best_model_dir, 'model')) - logging.info( - 'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}' - .format(best_model_epoch, best_mean_iou)) + logging.info( + 'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}' + .format(best_model_epoch, best_mean_iou)) if use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, @@ -286,9 +285,11 @@ def main(args): T.Normalize()]) eval_dataset = dataset(transforms=eval_transforms, mode='eval') - if args.model_name == 'UNet': - model = models.UNet( - num_classes=train_dataset.num_classes, ignore_index=255) + if args.model_name not in MODELS: + raise Exception( + '--model_name is invalid. it should be one of {}'.format( + str(list(MODELS.keys())))) + model = MODELS[args.model_name](num_classes=train_dataset.num_classes) # Creat optimizer # todo, may less one than len(loader) diff --git a/dygraph/transforms/transforms.py b/dygraph/transforms/transforms.py index 38c3be18a2ae885bfa6238304a614935401a6330..935a2c0f8670eaa24b148844aa727efe6942e666 100644 --- a/dygraph/transforms/transforms.py +++ b/dygraph/transforms/transforms.py @@ -1,3 +1,4 @@ +# coding: utf8 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,27 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .functional import * import random +from collections import OrderedDict + import numpy as np from PIL import Image import cv2 -from collections import OrderedDict +from .functional import * -class Compose: - """根据数据预处理/增强算子对输入数据进行操作。 - 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - - Args: - transforms (list): 数据预处理/增强算子。 - to_rgb (bool): 是否转化为rgb通道格式 - - Raises: - TypeError: transforms不是list对象 - ValueError: transforms元素个数小于1。 - """ +class Compose: def __init__(self, transforms, to_rgb=True): if not isinstance(transforms, list): raise TypeError('The transforms must be a list!') @@ -43,20 +34,8 @@ class Compose: self.to_rgb = to_rgb def __call__(self, im, im_info=None, label=None): - """ - Args: - im (str/np.ndarray): 图像路径/图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息,dict中的字段如下: - - shape_before_resize (tuple): 图像resize之前的大小(h, w)。 - - shape_before_padding (tuple): 图像padding之前的大小(h, w)。 - label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 - - Returns: - tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 - """ - if im_info is None: - im_info = dict() + im_info = list() if isinstance(im, str): im = cv2.imread(im).astype('float32') if isinstance(label, str): @@ -80,27 +59,10 @@ class Compose: class RandomHorizontalFlip: - """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机水平翻转的概率。默认值为0.5。 - - """ def __init__(self, prob=0.5): self.prob = prob def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if random.random() < self.prob: im = horizontal_flip(im) if label is not None: @@ -112,26 +74,10 @@ class RandomHorizontalFlip: class RandomVerticalFlip: - """以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机垂直翻转的概率。默认值为0.1。 - """ def __init__(self, prob=0.1): self.prob = prob def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if random.random() < self.prob: im = vertical_flip(im) if label is not None: @@ -143,25 +89,6 @@ class RandomVerticalFlip: class Resize: - """调整图像大小(resize)。 - - - 当目标大小(target_size)类型为int时,根据插值方式, - 将图像resize为[target_size, target_size]。 - - 当目标大小(target_size)类型为list或tuple时,根据插值方式, - 将图像resize为target_size。 - 注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。 - - Args: - target_size (int/list/tuple): 短边目标长度。默认为608。 - interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为 - ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC', - 'AREA', 'LANCZOS4', 'RANDOM']中。 - """ - # The interpolation mode interp_dict = { 'NEAREST': cv2.INTER_NEAREST, @@ -189,26 +116,9 @@ class Resize: self.target_size = target_size def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info跟新字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 数据长度不匹配。 - """ if im_info is None: - im_info = OrderedDict() - im_info['shape_before_resize'] = im.shape[:2] + im_info = list() + im_info.append(('resize', im.shape[:2])) if not isinstance(im, np.ndarray): raise TypeError("Resize: image type is not numpy.") if len(im.shape) != 3: @@ -228,32 +138,14 @@ class Resize: class ResizeByLong: - """对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - long_size (int): resize后图像的长边大小。 - """ def __init__(self, long_size): self.long_size = long_size def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - """ if im_info is None: - im_info = OrderedDict() + im_info = list() - im_info['shape_before_resize'] = im.shape[:2] + im_info.append(('resize', im.shape[:2])) im = resize_long(im, self.long_size) if label is not None: label = resize_long(label, self.long_size, cv2.INTER_NEAREST) @@ -265,15 +157,6 @@ class ResizeByLong: class ResizeRangeScaling: - """对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - min_value (int): 图像长边resize后的最小值。默认值400。 - max_value (int): 图像长边resize后的最大值。默认值600。 - - Raises: - ValueError: min_value大于max_value - """ def __init__(self, min_value=400, max_value=600): if min_value > max_value: raise ValueError('min_value must be less than max_value, ' @@ -283,17 +166,6 @@ class ResizeRangeScaling: self.max_value = max_value def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if self.min_value == self.max_value: random_size = self.max_value else: @@ -310,17 +182,6 @@ class ResizeRangeScaling: class ResizeStepScaling: - """对图像按照某一个比例resize,这个比例以scale_step_size为步长 - 在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。 - - Args: - min_scale_factor(float), resize最小尺度。默认值0.75。 - max_scale_factor (float), resize最大尺度。默认值1.25。 - scale_step_size (float), resize尺度范围间隔。默认值0.25。 - - Raises: - ValueError: min_scale_factor大于max_scale_factor - """ def __init__(self, min_scale_factor=0.75, max_scale_factor=1.25, @@ -335,17 +196,6 @@ class ResizeStepScaling: self.scale_step_size = scale_step_size def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if self.min_scale_factor == self.max_scale_factor: scale_factor = self.min_scale_factor @@ -375,17 +225,6 @@ class ResizeStepScaling: class Normalize: - """对图像进行标准化。 - 1.尺度缩放到 [0,1]。 - 2.对图像进行减均值除以标准差操作。 - - Args: - mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。 - std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。 - - Raises: - ValueError: mean或std不是list对象。std包含0。 - """ def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): self.mean = mean self.std = std @@ -396,18 +235,6 @@ class Normalize: raise ValueError('{}: std is invalid!'.format(self)) def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] im = normalize(im, mean, std) @@ -419,18 +246,6 @@ class Normalize: class Padding: - """对图像或标注图像进行padding,padding方向为右和下。 - 根据提供的值对图像或标注图像进行padding操作。 - - Args: - target_size (int|list|tuple): padding后图像的大小。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: target_size不是int|list|tuple。 - ValueError: target_size为list|tuple时元素个数不等于2。 - """ def __init__(self, target_size, im_padding_value=[127.5, 127.5, 127.5], @@ -449,25 +264,9 @@ class Padding: self.label_padding_value = label_padding_value def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。 - - Raises: - ValueError: 输入图像im或label的形状大于目标值 - """ if im_info is None: - im_info = OrderedDict() - im_info['shape_before_padding'] = im.shape[:2] + im_info = list() + im_info.append(('padding', im.shape[:2])) im_height, im_width = im.shape[0], im.shape[1] if isinstance(self.target_size, int): @@ -483,21 +282,23 @@ class Padding: 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' .format(im_width, im_height, target_width, target_height)) else: - im = cv2.copyMakeBorder(im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) if label is not None: - label = cv2.copyMakeBorder(label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) if label is None: return (im, im_info) else: @@ -505,17 +306,6 @@ class Padding: class RandomPaddingCrop: - """对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。 - - Args: - crop_size (int|list|tuple): 裁剪图像大小。默认为512。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: crop_size不是int/list/tuple。 - ValueError: target_size为list/tuple时元素个数不等于2。 - """ def __init__(self, crop_size=512, im_padding_value=[127.5, 127.5, 127.5], @@ -534,17 +324,6 @@ class RandomPaddingCrop: self.label_padding_value = label_padding_value def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if isinstance(self.crop_size, int): crop_width = self.crop_size crop_height = self.crop_size @@ -564,21 +343,23 @@ class RandomPaddingCrop: pad_height = max(crop_height - img_height, 0) pad_width = max(crop_width - img_width, 0) if (pad_height > 0 or pad_width > 0): - im = cv2.copyMakeBorder(im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) if label is not None: - label = cv2.copyMakeBorder(label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) img_height = im.shape[0] img_width = im.shape[1] @@ -586,11 +367,11 @@ class RandomPaddingCrop: h_off = np.random.randint(img_height - crop_height + 1) w_off = np.random.randint(img_width - crop_width + 1) - im = im[h_off:(crop_height + h_off), w_off:(w_off + - crop_width), :] + im = im[h_off:(crop_height + h_off), w_off:( + w_off + crop_width), :] if label is not None: - label = label[h_off:(crop_height + - h_off), w_off:(w_off + crop_width)] + label = label[h_off:(crop_height + h_off), w_off:( + w_off + crop_width)] if label is None: return (im, im_info) else: @@ -598,26 +379,10 @@ class RandomPaddingCrop: class RandomBlur: - """以一定的概率对图像进行高斯模糊。 - - Args: - prob (float): 图像模糊概率。默认为0.1。 - """ def __init__(self, prob=0.1): self.prob = prob def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if self.prob <= 0: n = 0 elif self.prob >= 1: @@ -640,16 +405,6 @@ class RandomBlur: class RandomRotation: - """对图像进行随机旋转。 - 在不超过最大旋转角度的情况下,图像进行随机旋转,当存在标注图像时,同步进行, - 并对旋转后的图像和标注图像进行相应的padding。 - - Args: - max_rotation (float): 最大旋转角度。默认为15度。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认为255。 - - """ def __init__(self, max_rotation=15, im_padding_value=[127.5, 127.5, 127.5], @@ -659,17 +414,6 @@ class RandomRotation: self.label_padding_value = label_padding_value def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if self.max_rotation > 0: (h, w) = im.shape[:2] do_rotation = np.random.uniform(-self.max_rotation, @@ -686,18 +430,20 @@ class RandomRotation: r[0, 2] += (nw / 2) - cx r[1, 2] += (nh / 2) - cy dsize = (nw, nh) - im = cv2.warpAffine(im, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - label = cv2.warpAffine(label, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.label_padding_value) + im = cv2.warpAffine( + im, + r, + dsize=dsize, + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + borderValue=self.im_padding_value) + label = cv2.warpAffine( + label, + r, + dsize=dsize, + flags=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=self.label_padding_value) if label is None: return (im, im_info) @@ -706,29 +452,11 @@ class RandomRotation: class RandomScaleAspect: - """裁剪并resize回原始尺寸的图像和标注图像。 - 按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 - - Args: - min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。 - aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。 - """ def __init__(self, min_scale=0.5, aspect_ratio=0.33): self.min_scale = min_scale self.aspect_ratio = aspect_ratio def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ if self.min_scale != 0 and self.aspect_ratio != 0: img_height = im.shape[0] img_width = im.shape[1] @@ -751,10 +479,12 @@ class RandomScaleAspect: im = im[h1:(h1 + dh), w1:(w1 + dw), :] label = label[h1:(h1 + dh), w1:(w1 + dw)] - im = cv2.resize(im, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - label = cv2.resize(label, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) + im = cv2.resize( + im, (img_width, img_height), + interpolation=cv2.INTER_LINEAR) + label = cv2.resize( + label, (img_width, img_height), + interpolation=cv2.INTER_NEAREST) break if label is None: return (im, im_info) @@ -763,21 +493,6 @@ class RandomScaleAspect: class RandomDistort: - """对图像进行随机失真。 - - 1. 对变换的操作顺序进行随机化操作。 - 2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。 - - Args: - brightness_range (float): 明亮度因子的范围。默认为0.5。 - brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 - contrast_range (float): 对比度因子的范围。默认为0.5。 - contrast_prob (float): 随机调整对比度的概率。默认为0.5。 - saturation_range (float): 饱和度因子的范围。默认为0.5。 - saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 - hue_range (int): 色调因子的范围。默认为18。 - hue_prob (float): 随机调整色调的概率。默认为0.5。 - """ def __init__(self, brightness_range=0.5, brightness_prob=0.5, @@ -797,17 +512,6 @@ class RandomDistort: self.hue_prob = hue_prob def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ brightness_lower = 1 - self.brightness_range brightness_upper = 1 + self.brightness_range contrast_lower = 1 - self.contrast_range diff --git a/dygraph/utils/utils.py b/dygraph/utils/utils.py index 46e204dd2e91f319c788eb43ca50602308ce1954..fa995d27af3f78e97bc06d586fa7bb2ecf439f83 100644 --- a/dygraph/utils/utils.py +++ b/dygraph/utils/utils.py @@ -52,7 +52,11 @@ def load_pretrained_model(model, pretrained_model): logging.info('Load pretrained model from {}'.format(pretrained_model)) if os.path.exists(pretrained_model): ckpt_path = os.path.join(pretrained_model, 'model') - para_state_dict, _ = fluid.load_dygraph(ckpt_path) + try: + para_state_dict, _ = fluid.load_dygraph(ckpt_path) + except: + para_state_dict = fluid.load_program_state(pretrained_model) + model_state_dict = model.state_dict() keys = model_state_dict.keys() num_params_loaded = 0 diff --git a/dygraph/val.py b/dygraph/val.py index 77965f3f8a040d1bfa2f1c6cfaa3a838ddc937c7..ca36a6fe1ca169d30f2dbd06ff58da62b507ff4f 100644 --- a/dygraph/val.py +++ b/dygraph/val.py @@ -16,8 +16,10 @@ import argparse import os import math -from paddle.fluid.dygraph.base import to_variable import numpy as np +import tqdm +import cv2 +from paddle.fluid.dygraph.base import to_variable import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.io import DataLoader @@ -25,7 +27,7 @@ from paddle.fluid.dataloader import BatchSampler from datasets import OpticDiscSeg, Cityscapes import transforms as T -import models +from models import MODELS import utils.logging as logging from utils import get_environ_info from utils import ConfusionMatrix @@ -39,7 +41,8 @@ def parse_args(): parser.add_argument( '--model_name', dest='model_name', - help="Model type for evaluation, which is one of ('UNet')", + help='Model type for evaluation, which is one of {}'.format( + str(list(MODELS.keys()))), type=str, default='UNet') @@ -60,12 +63,6 @@ def parse_args(): nargs=2, default=[512, 512], type=int) - parser.add_argument( - '--batch_size', - dest='batch_size', - help='Mini batch size', - type=int, - default=2) parser.add_argument( '--model_dir', dest='model_dir', @@ -78,10 +75,8 @@ def parse_args(): def evaluate(model, eval_dataset=None, - places=None, model_dir=None, num_classes=None, - batch_size=2, ignore_index=255, epoch_id=None): ckpt_path = os.path.join(model_dir, 'model') @@ -89,15 +84,7 @@ def evaluate(model, model.set_dict(para_state_dict) model.eval() - batch_sampler = BatchSampler( - eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False) - loader = DataLoader( - eval_dataset, - batch_sampler=batch_sampler, - places=places, - return_list=True, - ) - total_steps = len(batch_sampler) + total_steps = len(eval_dataset) conf_mat = ConfusionMatrix(num_classes, streaming=True) logging.info( @@ -105,15 +92,26 @@ def evaluate(model, len(eval_dataset), total_steps)) timer = Timer() timer.start() - for step, data in enumerate(loader): - images = data[0] - labels = data[1].astype('int64') - pred, _ = model(images, mode='eval') - - pred = pred.numpy() - labels = labels.numpy() - mask = labels != ignore_index - conf_mat.calculate(pred=pred, label=labels, ignore=mask) + for step, (im, im_info, label) in enumerate(eval_dataset): + im = to_variable(im) + pred, _ = model(im, mode='eval') + pred = pred.numpy().astype('float32') + pred = np.squeeze(pred) + for info in im_info[::-1]: + if info[0] == 'resize': + h, w = info[1][0], info[1][1] + pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) + elif info[0] == 'padding': + h, w = info[1][0], info[1][1] + pred = pred[0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format( + info[0])) + pred = pred[np.newaxis, :, :, np.newaxis] + pred = pred.astype('int64') + mask = label != ignore_index + + conf_mat.calculate(pred=pred, label=label, ignore=mask) _, iou = conf_mat.mean_iou() time_step = timer.elapsed_time() @@ -153,16 +151,17 @@ def main(args): eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()]) eval_dataset = dataset(transforms=eval_transforms, mode='eval') - if args.model_name == 'UNet': - model = models.UNet(num_classes=eval_dataset.num_classes) + if args.model_name not in MODELS: + raise Exception( + '--model_name is invalid. it should be one of {}'.format( + str(list(MODELS.keys())))) + model = MODELS[args.model_name](num_classes=eval_dataset.num_classes) evaluate( model, eval_dataset, - places=places, model_dir=args.model_dir, - num_classes=eval_dataset.num_classes, - batch_size=args.batch_size) + num_classes=eval_dataset.num_classes) if __name__ == '__main__':