未验证 提交 27919efd 编写于 作者: W wuzewu 提交者: GitHub

Merge pull request #318 from wuyefeilin/dygraph

# 动态图执行
## 数据集设置
```
data_dir='data/path'
train_list='train/list/path'
val_list='val/list/path'
test_list='test/list/path'
num_classes=number/of/dataset/classes
```
## 训练
```
python3 train.py --model_name UNet \
--data_dir $data_dir \
--train_list $train_list \
--val_list $val_list \
--num_classes $num_classes \
--dataset OpticDiscSeg \
--input_size 192 192 \
--num_epochs 4 \
--num_epochs 10 \
--save_interval_epochs 1 \
--do_eval \
--save_dir output
```
## 评估
```
python3 val.py --model_name UNet \
--data_dir $data_dir \
--val_list $val_list \
--num_classes $num_classes \
--dataset OpticDiscSeg \
--input_size 192 192 \
--model_dir output/epoch_1
--model_dir output/best_model
```
## 预测
```
python3 infer.py --model_name UNet \
--data_dir $data_dir \
--test_list $test_list \
--num_classes $num_classes \
--input_size 192 192 \
--model_dir output/epoch_1
--dataset OpticDiscSeg \
--model_dir output/best_model \
--input_size 192 192
```
......@@ -12,5 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .dataset import Dataset
from .optic_disc_seg import OpticDiscSeg
from .cityscapes import Cityscapes
......@@ -14,8 +14,7 @@
import os
from paddle.fluid.io import Dataset
from .dataset import Dataset
from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
......@@ -70,16 +69,3 @@ class Cityscapes(Dataset):
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
im, im_info, label = self.transforms(im=image_path, label=grt_path)
if self.mode == 'train':
return im, label
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
import numpy as np
from PIL import Image
class Dataset(fluid.io.Dataset):
def __init__(self,
data_dir,
num_classes,
train_list=None,
val_list=None,
test_list=None,
separator=' ',
transforms=None,
mode='train'):
self.data_dir = data_dir
self.transforms = transforms
self.file_list = list()
self.mode = mode
self.num_classes = num_classes
if mode.lower() not in ['train', 'eval', 'test']:
raise Exception(
"mode should be 'train', 'eval' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("transform is necessary, but it is None.")
self.data_dir = data_dir
if mode == 'train':
if train_list is None:
raise Exception(
'When mode is "train", train_list is need, but it is None.')
elif not os.path.exists(train_list):
raise Exception(
'train_list is not found: {}'.format(train_list))
else:
file_list = train_list
elif mode == 'eval':
if val_list is None:
raise Exception(
'When mode is "eval", val_list is need, but it is None.')
elif not os.path.exists(val_list):
raise Exception('val_list is not found: {}'.format(val_list))
else:
file_list = val_list
else:
if test_list is None:
raise Exception(
'When mode is "test", test_list is need, but it is None.')
elif not os.path.exists(test_list):
raise Exception('test_list is not found: {}'.format(test_list))
else:
file_list = test_list
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split(separator)
if len(items) != 2:
if mode == 'train' or mode == 'eval':
raise Exception(
"File list format incorrect! It should be"
" image_name{}label_name\\n".format(separator))
image_path = os.path.join(self.data_dir, items[0])
grt_path = None
else:
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'train':
im, im_info, label = self.transforms(im=image_path, label=grt_path)
return im, label
elif self.mode == 'eval':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
......@@ -14,8 +14,7 @@
import os
from paddle.fluid.io import Dataset
from .dataset import Dataset
from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
......@@ -70,16 +69,3 @@ class OpticDiscSeg(Dataset):
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
im, im_info, label = self.transforms(im=image_path, label=grt_path)
if self.mode == 'train':
return im, label
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
......@@ -24,7 +24,7 @@ import tqdm
from datasets import OpticDiscSeg, Cityscapes
import transforms as T
import models
from models import MODELS
import utils
import utils.logging as logging
from utils import get_environ_info
......@@ -37,7 +37,8 @@ def parse_args():
parser.add_argument(
'--model_name',
dest='model_name',
help="Model type for traing, which is one of ('UNet')",
help='Model type for testing, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str,
default='UNet')
......@@ -97,19 +98,20 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
logging.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = im[np.newaxis, ...]
im = to_variable(im)
pred, _ = model(im, mode='test')
pred = pred.numpy()
pred = np.squeeze(pred).astype('uint8')
keys = list(im_info.keys())
for k in keys[::-1]:
if k == 'shape_before_resize':
h, w = im_info[k][0], im_info[k][1]
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif k == 'shape_before_padding':
h, w = im_info[k][0], im_info[k][1]
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
im_file = im_path.replace(test_dataset.data_dir, '')
if im_file[0] == '/':
......@@ -146,8 +148,11 @@ def main(args):
test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
test_dataset = dataset(transforms=test_transforms, mode='test')
if args.model_name == 'UNet':
model = models.UNet(num_classes=test_dataset.num_classes)
if args.model_name not in MODELS:
raise Exception(
'--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=test_dataset.num_classes)
infer(
model,
......
......@@ -13,3 +13,28 @@
# limitations under the License.
from .unet import UNet
from .hrnet import *
MODELS = {
"UNet": UNet,
"HRNet_W18_Small_V1": HRNet_W18_Small_V1,
"HRNet_W18_Small_V2": HRNet_W18_Small_V2,
"HRNet_W18": HRNet_W18,
"HRNet_W30": HRNet_W30,
"HRNet_W32": HRNet_W32,
"HRNet_W40": HRNet_W40,
"HRNet_W44": HRNet_W44,
"HRNet_W48": HRNet_W48,
"HRNet_W60": HRNet_W48,
"HRNet_W64": HRNet_W64,
"SE_HRNet_W18_Small_V1": SE_HRNet_W18_Small_V1,
"SE_HRNet_W18_Small_V2": SE_HRNet_W18_Small_V2,
"SE_HRNet_W18": SE_HRNet_W18,
"SE_HRNet_W30": SE_HRNet_W30,
"SE_HRNet_W32": SE_HRNet_W30,
"SE_HRNet_W40": SE_HRNet_W40,
"SE_HRNet_W44": SE_HRNet_W44,
"SE_HRNet_W48": SE_HRNet_W48,
"SE_HRNet_W60": SE_HRNet_W60,
"SE_HRNet_W64": SE_HRNet_W64
}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
try:
from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
except:
from paddle.fluid.dygraph import BatchNorm
__all__ = [
"HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
"HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60",
"HRNet_W64", "SE_HRNet_W18_Small_V1", "SE_HRNet_W18_Small_V2",
"SE_HRNet_W18", "SE_HRNet_W30", "SE_HRNet_W32", "SE_HRNet_W40",
"SE_HRNet_W44", "SE_HRNet_W48", "SE_HRNet_W60", "SE_HRNet_W64"
]
class HRNet(fluid.dygraph.Layer):
def __init__(self,
num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=False,
ignore_index=255):
super(HRNet, self).__init__()
self.num_classes = num_classes
self.stage1_num_modules = stage1_num_modules
self.stage1_num_blocks = stage1_num_blocks
self.stage1_num_channels = stage1_num_channels
self.stage2_num_modules = stage2_num_modules
self.stage2_num_blocks = stage2_num_blocks
self.stage2_num_channels = stage2_num_channels
self.stage3_num_modules = stage3_num_modules
self.stage3_num_blocks = stage3_num_blocks
self.stage3_num_channels = stage3_num_channels
self.stage4_num_modules = stage4_num_modules
self.stage4_num_blocks = stage4_num_blocks
self.stage4_num_channels = stage4_num_channels
self.has_se = has_se
self.ignore_index = ignore_index
self.EPS = 1e-5
self.conv_layer1_1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_1")
self.conv_layer1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_2")
self.la1 = Layer1(
num_channels=64,
num_blocks=self.stage1_num_blocks[0],
num_filters=self.stage1_num_channels[0],
has_se=has_se,
name="layer2")
self.tr1 = TransitionLayer(
in_channels=[self.stage1_num_channels[0] * 4],
out_channels=self.stage2_num_channels,
name="tr1")
self.st2 = Stage(
num_channels=self.stage2_num_channels,
num_modules=self.stage2_num_modules,
num_blocks=self.stage2_num_blocks,
num_filters=self.stage2_num_channels,
has_se=self.has_se,
name="st2")
self.tr2 = TransitionLayer(
in_channels=self.stage2_num_channels,
out_channels=self.stage3_num_channels,
name="tr2")
self.st3 = Stage(
num_channels=self.stage3_num_channels,
num_modules=self.stage3_num_modules,
num_blocks=self.stage3_num_blocks,
num_filters=self.stage3_num_channels,
name="st3")
self.tr3 = TransitionLayer(
in_channels=self.stage3_num_channels,
out_channels=self.stage4_num_channels,
name="tr3")
self.st4 = Stage(
num_channels=self.stage4_num_channels,
num_modules=self.stage4_num_modules,
num_blocks=self.stage4_num_blocks,
num_filters=self.stage4_num_channels,
name="st4")
last_inp_channels = sum(self.stage4_num_channels)
self.conv_last_2 = ConvBNLayer(
num_channels=last_inp_channels,
num_filters=last_inp_channels,
filter_size=1,
stride=1,
name='conv-2')
self.conv_last_1 = Conv2D(
num_channels=last_inp_channels,
num_filters=self.num_classes,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='conv-1_weights'))
def forward(self, x, label=None, mode='train'):
input_shape = x.shape[2:]
conv1 = self.conv_layer1_1(x)
conv2 = self.conv_layer1_2(conv1)
la1 = self.la1(conv2)
tr1 = self.tr1([la1])
st2 = self.st2(tr1)
tr2 = self.tr2(st2)
st3 = self.st3(tr2)
tr3 = self.tr3(st3)
st4 = self.st4(tr3)
x0_h, x0_w = st4[0].shape[2:]
x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
x = self.conv_last_2(x)
logit = self.conv_last_1(x)
logit = fluid.layers.resize_bilinear(logit, input_shape)
if mode == 'train':
if label is None:
raise Exception('Label is need during training')
return self._get_loss(logit, label)
else:
score_map = fluid.layers.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
def _get_loss(self, logit, label):
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = name + '_bn'
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
return y
class Layer1(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
num_blocks,
has_se=False,
name=None):
super(Layer1, self).__init__()
self.bottleneck_block_list = []
for i in range(num_blocks):
bottleneck_block = self.add_sublayer(
"bb_{}_{}".format(name, i + 1),
BottleneckBlock(
num_channels=num_channels if i == 0 else num_filters * 4,
num_filters=num_filters,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1)))
self.bottleneck_block_list.append(bottleneck_block)
def forward(self, input):
conv = input
for block_func in self.bottleneck_block_list:
conv = block_func(conv)
return conv
class TransitionLayer(fluid.dygraph.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(TransitionLayer, self).__init__()
num_in = len(in_channels)
num_out = len(out_channels)
self.conv_bn_func_list = []
for i in range(num_out):
residual = None
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[i],
num_filters=out_channels[i],
filter_size=3,
name=name + '_layer_' + str(i + 1)))
else:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[-1],
num_filters=out_channels[i],
filter_size=3,
stride=2,
name=name + '_layer_' + str(i + 1)))
self.conv_bn_func_list.append(residual)
def forward(self, input):
outs = []
for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
if conv_bn_func is None:
outs.append(input[idx])
else:
if idx < len(input):
outs.append(conv_bn_func(input[idx]))
else:
outs.append(conv_bn_func(input[-1]))
return outs
class Branches(fluid.dygraph.Layer):
def __init__(self,
num_blocks,
in_channels,
out_channels,
has_se=False,
name=None):
super(Branches, self).__init__()
self.basic_block_list = []
for i in range(len(out_channels)):
self.basic_block_list.append([])
for j in range(num_blocks[i]):
in_ch = in_channels[i] if j == 0 else out_channels[i]
basic_block_func = self.add_sublayer(
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
BasicBlock(
num_channels=in_ch,
num_filters=out_channels[i],
has_se=has_se,
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.basic_block_list[i].append(basic_block_func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
conv = input
for basic_block_func in self.basic_block_list[idx]:
conv = basic_block_func(conv)
outs.append(conv)
return outs
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False,
name=None):
super(BottleneckBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu",
name=name + "_conv1",
)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv2")
self.conv3 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_conv3")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv3 = self.se(conv3)
y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
return y
class BasicBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride=1,
has_se=False,
downsample=False,
name=None):
super(BasicBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv1")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
act=None,
name=name + "_conv2")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act="relu",
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv2 = self.se(conv2)
y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
return y
class SELayer(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
act="relu",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
act="sigmoid",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
squeeze = self.squeeze(pool)
excitation = self.excitation(squeeze)
excitation = fluid.layers.reshape(
excitation, shape=[-1, self._num_channels, 1, 1])
out = input * excitation
return out
class Stage(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_modules,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(Stage, self).__init__()
self._num_modules = num_modules
self.stage_func_list = []
for i in range(num_modules):
if i == num_modules - 1 and not multi_scale_output:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
multi_scale_output=False,
name=name + '_' + str(i + 1)))
else:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
name=name + '_' + str(i + 1)))
self.stage_func_list.append(stage_func)
def forward(self, input):
out = input
for idx in range(self._num_modules):
out = self.stage_func_list[idx](out)
return out
class HighResolutionModule(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(HighResolutionModule, self).__init__()
self.branches_func = Branches(
num_blocks=num_blocks,
in_channels=num_channels,
out_channels=num_filters,
has_se=has_se,
name=name)
self.fuse_func = FuseLayers(
in_channels=num_filters,
out_channels=num_filters,
multi_scale_output=multi_scale_output,
name=name)
def forward(self, input):
out = self.branches_func(input)
out = self.fuse_func(out)
return out
class FuseLayers(fluid.dygraph.Layer):
def __init__(self,
in_channels,
out_channels,
multi_scale_output=True,
name=None):
super(FuseLayers, self).__init__()
self._actual_ch = len(in_channels) if multi_scale_output else 1
self._in_channels = in_channels
self.residual_func_list = []
for i in range(self._actual_ch):
for j in range(len(in_channels)):
residual_func = None
if j > i:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
ConvBNLayer(
num_channels=in_channels[j],
num_filters=out_channels[i],
filter_size=1,
stride=1,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.residual_func_list.append(residual_func)
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[i],
filter_size=3,
stride=2,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[i]
else:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[j],
filter_size=3,
stride=2,
act="relu",
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[j]
self.residual_func_list.append(residual_func)
def forward(self, input):
outs = []
residual_func_idx = 0
for i in range(self._actual_ch):
residual = input[i]
for j in range(len(self._in_channels)):
if j > i:
y = self.residual_func_list[residual_func_idx](input[j])
residual_func_idx += 1
y = fluid.layers.resize_nearest(input=y, scale=2**(j - i))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
y = input[j]
for k in range(i - j):
y = self.residual_func_list[residual_func_idx](y)
residual_func_idx += 1
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
layer_helper = LayerHelper(self.full_name(), act='relu')
residual = layer_helper.append_activation(residual)
outs.append(residual)
return outs
class LastClsOut(fluid.dygraph.Layer):
def __init__(self,
num_channel_list,
has_se,
num_filters_list=[32, 64, 128, 256],
name=None):
super(LastClsOut, self).__init__()
self.func_list = []
for idx in range(len(num_channel_list)):
func = self.add_sublayer(
"conv_{}_conv_{}".format(name, idx + 1),
BottleneckBlock(
num_channels=num_channel_list[idx],
num_filters=num_filters_list[idx],
has_se=has_se,
downsample=True,
name=name + 'conv_' + str(idx + 1)))
self.func_list.append(func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
out = self.func_list[idx](input)
outs.append(out)
return outs
def HRNet_W18_Small_V1(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128])
return model
def HRNet_W18_Small_V2(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144])
return model
def HRNet_W18(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144])
return model
def HRNet_W30(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240])
return model
def HRNet_W32(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256])
return model
def HRNet_W40(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320])
return model
def HRNet_W44(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352])
return model
def HRNet_W48(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384])
return model
def HRNet_W60(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480])
return model
def HRNet_W64(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512])
return model
def SE_HRNet_W18_Small_V1(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128],
has_se=True)
return model
def SE_HRNet_W18_Small_V2(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144],
has_se=True)
return model
def SE_HRNet_W18(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=True)
return model
def SE_HRNet_W30(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240],
has_se=True)
return model
def SE_HRNet_W32(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256],
has_se=True)
return model
def SE_HRNet_W40(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320],
has_se=True)
return model
def SE_HRNet_W44(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352],
has_se=True)
return model
def SE_HRNet_W48(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384],
has_se=True)
return model
def SE_HRNet_W60(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480],
has_se=True)
return model
def SE_HRNet_W64(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512],
has_se=True)
return model
......@@ -13,7 +13,11 @@
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D
from paddle.fluid.dygraph import Conv2D, Pool2D
try:
from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
except:
from paddle.fluid.dygraph import BatchNorm
class UNet(fluid.dygraph.Layer):
......@@ -39,6 +43,8 @@ class UNet(fluid.dygraph.Layer):
return pred, score_map
def _get_loss(self, logit, label):
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
......
......@@ -22,7 +22,7 @@ from paddle.incubate.hapi.distributed import DistributedBatchSampler
from datasets import OpticDiscSeg, Cityscapes
import transforms as T
import models
from models import MODELS
import utils.logging as logging
from utils import get_environ_info
from utils import load_pretrained_model
......@@ -38,7 +38,8 @@ def parse_args():
parser.add_argument(
'--model_name',
dest='model_name',
help="Model type for traing, which is one of ('UNet')",
help='Model type for training, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str,
default='UNet')
......@@ -181,7 +182,7 @@ def train(model,
total_steps = steps_per_epoch * (num_epochs - start_epoch)
num_steps = 0
best_mean_iou = -1.0
best_model_epoch = 1
best_model_epoch = -1
for epoch in range(start_epoch, num_epochs):
for step, data in enumerate(loader):
images = data[0]
......@@ -229,10 +230,8 @@ def train(model,
mean_iou, mean_acc = evaluate(
model,
eval_dataset,
places=places,
model_dir=current_save_dir,
num_classes=num_classes,
batch_size=batch_size,
ignore_index=ignore_index,
epoch_id=epoch + 1)
if mean_iou > best_mean_iou:
......@@ -241,9 +240,9 @@ def train(model,
best_model_dir = os.path.join(save_dir, "best_model")
fluid.save_dygraph(model.state_dict(),
os.path.join(best_model_dir, 'model'))
logging.info(
'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
.format(best_model_epoch, best_mean_iou))
logging.info(
'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
.format(best_model_epoch, best_mean_iou))
if use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
......@@ -286,9 +285,11 @@ def main(args):
T.Normalize()])
eval_dataset = dataset(transforms=eval_transforms, mode='eval')
if args.model_name == 'UNet':
model = models.UNet(
num_classes=train_dataset.num_classes, ignore_index=255)
if args.model_name not in MODELS:
raise Exception(
'--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=train_dataset.num_classes)
# Creat optimizer
# todo, may less one than len(loader)
......
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -12,27 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .functional import *
import random
from collections import OrderedDict
import numpy as np
from PIL import Image
import cv2
from collections import OrderedDict
from .functional import *
class Compose:
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
to_rgb (bool): 是否转化为rgb通道格式
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
"""
class Compose:
def __init__(self, transforms, to_rgb=True):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
......@@ -43,20 +34,8 @@ class Compose:
self.to_rgb = to_rgb
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息,dict中的字段如下:
- shape_before_resize (tuple): 图像resize之前的大小(h, w)。
- shape_before_padding (tuple): 图像padding之前的大小(h, w)。
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
if im_info is None:
im_info = dict()
im_info = list()
if isinstance(im, str):
im = cv2.imread(im).astype('float32')
if isinstance(label, str):
......@@ -80,27 +59,10 @@ class Compose:
class RandomHorizontalFlip:
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机水平翻转的概率。默认值为0.5。
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = horizontal_flip(im)
if label is not None:
......@@ -112,26 +74,10 @@ class RandomHorizontalFlip:
class RandomVerticalFlip:
"""以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机垂直翻转的概率。默认值为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob:
im = vertical_flip(im)
if label is not None:
......@@ -143,25 +89,6 @@ class RandomVerticalFlip:
class Resize:
"""调整图像大小(resize)。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size。
注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。
Args:
target_size (int/list/tuple): 短边目标长度。默认为608。
interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
'AREA', 'LANCZOS4', 'RANDOM']中。
"""
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
......@@ -189,26 +116,9 @@ class Resize:
self.target_size = target_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info跟新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None:
im_info = OrderedDict()
im_info['shape_before_resize'] = im.shape[:2]
im_info = list()
im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3:
......@@ -228,32 +138,14 @@ class Resize:
class ResizeByLong:
"""对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
long_size (int): resize后图像的长边大小。
"""
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
"""
if im_info is None:
im_info = OrderedDict()
im_info = list()
im_info['shape_before_resize'] = im.shape[:2]
im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size)
if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
......@@ -265,15 +157,6 @@ class ResizeByLong:
class ResizeRangeScaling:
"""对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
min_value (int): 图像长边resize后的最小值。默认值400。
max_value (int): 图像长边resize后的最大值。默认值600。
Raises:
ValueError: min_value大于max_value
"""
def __init__(self, min_value=400, max_value=600):
if min_value > max_value:
raise ValueError('min_value must be less than max_value, '
......@@ -283,17 +166,6 @@ class ResizeRangeScaling:
self.max_value = max_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_value == self.max_value:
random_size = self.max_value
else:
......@@ -310,17 +182,6 @@ class ResizeRangeScaling:
class ResizeStepScaling:
"""对图像按照某一个比例resize,这个比例以scale_step_size为步长
在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。
Args:
min_scale_factor(float), resize最小尺度。默认值0.75。
max_scale_factor (float), resize最大尺度。默认值1.25。
scale_step_size (float), resize尺度范围间隔。默认值0.25。
Raises:
ValueError: min_scale_factor大于max_scale_factor
"""
def __init__(self,
min_scale_factor=0.75,
max_scale_factor=1.25,
......@@ -335,17 +196,6 @@ class ResizeStepScaling:
self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor
......@@ -375,17 +225,6 @@ class ResizeStepScaling:
class Normalize:
"""对图像进行标准化。
1.尺度缩放到 [0,1]。
2.对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
Raises:
ValueError: mean或std不是list对象。std包含0。
"""
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean
self.std = std
......@@ -396,18 +235,6 @@ class Normalize:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
......@@ -419,18 +246,6 @@ class Normalize:
class Padding:
"""对图像或标注图像进行padding,padding方向为右和下。
根据提供的值对图像或标注图像进行padding操作。
Args:
target_size (int|list|tuple): padding后图像的大小。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: target_size不是int|list|tuple。
ValueError: target_size为list|tuple时元素个数不等于2。
"""
def __init__(self,
target_size,
im_padding_value=[127.5, 127.5, 127.5],
......@@ -449,25 +264,9 @@ class Padding:
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。
Raises:
ValueError: 输入图像im或label的形状大于目标值
"""
if im_info is None:
im_info = OrderedDict()
im_info['shape_before_padding'] = im.shape[:2]
im_info = list()
im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int):
......@@ -483,21 +282,23 @@ class Padding:
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height))
else:
im = cv2.copyMakeBorder(im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None:
return (im, im_info)
else:
......@@ -505,17 +306,6 @@ class Padding:
class RandomPaddingCrop:
"""对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。
Args:
crop_size (int|list|tuple): 裁剪图像大小。默认为512。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: crop_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
"""
def __init__(self,
crop_size=512,
im_padding_value=[127.5, 127.5, 127.5],
......@@ -534,17 +324,6 @@ class RandomPaddingCrop:
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if isinstance(self.crop_size, int):
crop_width = self.crop_size
crop_height = self.crop_size
......@@ -564,21 +343,23 @@ class RandomPaddingCrop:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0]
img_width = im.shape[1]
......@@ -586,11 +367,11 @@ class RandomPaddingCrop:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(w_off +
crop_width), :]
im = im[h_off:(crop_height + h_off), w_off:(
w_off + crop_width), :]
if label is not None:
label = label[h_off:(crop_height +
h_off), w_off:(w_off + crop_width)]
label = label[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
if label is None:
return (im, im_info)
else:
......@@ -598,26 +379,10 @@ class RandomPaddingCrop:
class RandomBlur:
"""以一定的概率对图像进行高斯模糊。
Args:
prob (float): 图像模糊概率。默认为0.1。
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.prob <= 0:
n = 0
elif self.prob >= 1:
......@@ -640,16 +405,6 @@ class RandomBlur:
class RandomRotation:
"""对图像进行随机旋转。
在不超过最大旋转角度的情况下,图像进行随机旋转,当存在标注图像时,同步进行,
并对旋转后的图像和标注图像进行相应的padding。
Args:
max_rotation (float): 最大旋转角度。默认为15度。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认为255。
"""
def __init__(self,
max_rotation=15,
im_padding_value=[127.5, 127.5, 127.5],
......@@ -659,17 +414,6 @@ class RandomRotation:
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.max_rotation > 0:
(h, w) = im.shape[:2]
do_rotation = np.random.uniform(-self.max_rotation,
......@@ -686,18 +430,20 @@ class RandomRotation:
r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy
dsize = (nw, nh)
im = cv2.warpAffine(im,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.im_padding_value)
label = cv2.warpAffine(label,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
im = cv2.warpAffine(
im,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.im_padding_value)
label = cv2.warpAffine(
label,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
if label is None:
return (im, im_info)
......@@ -706,29 +452,11 @@ class RandomRotation:
class RandomScaleAspect:
"""裁剪并resize回原始尺寸的图像和标注图像。
按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。
Args:
min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。
aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
"""
def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale
self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0]
img_width = im.shape[1]
......@@ -751,10 +479,12 @@ class RandomScaleAspect:
im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
im = cv2.resize(
im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
if label is None:
return (im, im_info)
......@@ -763,21 +493,6 @@ class RandomScaleAspect:
class RandomDistort:
"""对图像进行随机失真。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
......@@ -797,17 +512,6 @@ class RandomDistort:
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
......
......@@ -52,7 +52,11 @@ def load_pretrained_model(model, pretrained_model):
logging.info('Load pretrained model from {}'.format(pretrained_model))
if os.path.exists(pretrained_model):
ckpt_path = os.path.join(pretrained_model, 'model')
para_state_dict, _ = fluid.load_dygraph(ckpt_path)
try:
para_state_dict, _ = fluid.load_dygraph(ckpt_path)
except:
para_state_dict = fluid.load_program_state(pretrained_model)
model_state_dict = model.state_dict()
keys = model_state_dict.keys()
num_params_loaded = 0
......
......@@ -16,8 +16,10 @@ import argparse
import os
import math
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import tqdm
import cv2
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import DataLoader
......@@ -25,7 +27,7 @@ from paddle.fluid.dataloader import BatchSampler
from datasets import OpticDiscSeg, Cityscapes
import transforms as T
import models
from models import MODELS
import utils.logging as logging
from utils import get_environ_info
from utils import ConfusionMatrix
......@@ -39,7 +41,8 @@ def parse_args():
parser.add_argument(
'--model_name',
dest='model_name',
help="Model type for evaluation, which is one of ('UNet')",
help='Model type for evaluation, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str,
default='UNet')
......@@ -60,12 +63,6 @@ def parse_args():
nargs=2,
default=[512, 512],
type=int)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size',
type=int,
default=2)
parser.add_argument(
'--model_dir',
dest='model_dir',
......@@ -78,10 +75,8 @@ def parse_args():
def evaluate(model,
eval_dataset=None,
places=None,
model_dir=None,
num_classes=None,
batch_size=2,
ignore_index=255,
epoch_id=None):
ckpt_path = os.path.join(model_dir, 'model')
......@@ -89,15 +84,7 @@ def evaluate(model,
model.set_dict(para_state_dict)
model.eval()
batch_sampler = BatchSampler(
eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
loader = DataLoader(
eval_dataset,
batch_sampler=batch_sampler,
places=places,
return_list=True,
)
total_steps = len(batch_sampler)
total_steps = len(eval_dataset)
conf_mat = ConfusionMatrix(num_classes, streaming=True)
logging.info(
......@@ -105,15 +92,26 @@ def evaluate(model,
len(eval_dataset), total_steps))
timer = Timer()
timer.start()
for step, data in enumerate(loader):
images = data[0]
labels = data[1].astype('int64')
pred, _ = model(images, mode='eval')
pred = pred.numpy()
labels = labels.numpy()
mask = labels != ignore_index
conf_mat.calculate(pred=pred, label=labels, ignore=mask)
for step, (im, im_info, label) in enumerate(eval_dataset):
im = to_variable(im)
pred, _ = model(im, mode='eval')
pred = pred.numpy().astype('float32')
pred = np.squeeze(pred)
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
pred = pred[np.newaxis, :, :, np.newaxis]
pred = pred.astype('int64')
mask = label != ignore_index
conf_mat.calculate(pred=pred, label=label, ignore=mask)
_, iou = conf_mat.mean_iou()
time_step = timer.elapsed_time()
......@@ -153,16 +151,17 @@ def main(args):
eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
eval_dataset = dataset(transforms=eval_transforms, mode='eval')
if args.model_name == 'UNet':
model = models.UNet(num_classes=eval_dataset.num_classes)
if args.model_name not in MODELS:
raise Exception(
'--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=eval_dataset.num_classes)
evaluate(
model,
eval_dataset,
places=places,
model_dir=args.model_dir,
num_classes=eval_dataset.num_classes,
batch_size=args.batch_size)
num_classes=eval_dataset.num_classes)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册