未验证 提交 27919efd 编写于 作者: W wuzewu 提交者: GitHub

Merge pull request #318 from wuyefeilin/dygraph

# 动态图执行 # 动态图执行
## 数据集设置
```
data_dir='data/path'
train_list='train/list/path'
val_list='val/list/path'
test_list='test/list/path'
num_classes=number/of/dataset/classes
```
## 训练 ## 训练
``` ```
python3 train.py --model_name UNet \ python3 train.py --model_name UNet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--train_list $train_list \
--val_list $val_list \
--num_classes $num_classes \
--input_size 192 192 \ --input_size 192 192 \
--num_epochs 4 \ --num_epochs 10 \
--save_interval_epochs 1 \ --save_interval_epochs 1 \
--do_eval \
--save_dir output --save_dir output
``` ```
## 评估 ## 评估
``` ```
python3 val.py --model_name UNet \ python3 val.py --model_name UNet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--val_list $val_list \
--num_classes $num_classes \
--input_size 192 192 \ --input_size 192 192 \
--model_dir output/epoch_1 --model_dir output/best_model
``` ```
## 预测 ## 预测
``` ```
python3 infer.py --model_name UNet \ python3 infer.py --model_name UNet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--test_list $test_list \ --model_dir output/best_model \
--num_classes $num_classes \ --input_size 192 192
--input_size 192 192 \
--model_dir output/epoch_1
``` ```
...@@ -12,5 +12,6 @@ ...@@ -12,5 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .dataset import Dataset
from .optic_disc_seg import OpticDiscSeg from .optic_disc_seg import OpticDiscSeg
from .cityscapes import Cityscapes from .cityscapes import Cityscapes
...@@ -14,8 +14,7 @@ ...@@ -14,8 +14,7 @@
import os import os
from paddle.fluid.io import Dataset from .dataset import Dataset
from utils.download import download_file_and_uncompress from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
...@@ -70,16 +69,3 @@ class Cityscapes(Dataset): ...@@ -70,16 +69,3 @@ class Cityscapes(Dataset):
image_path = os.path.join(self.data_dir, items[0]) image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1]) grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path]) self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
im, im_info, label = self.transforms(im=image_path, label=grt_path)
if self.mode == 'train':
return im, label
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
import numpy as np
from PIL import Image
class Dataset(fluid.io.Dataset):
def __init__(self,
data_dir,
num_classes,
train_list=None,
val_list=None,
test_list=None,
separator=' ',
transforms=None,
mode='train'):
self.data_dir = data_dir
self.transforms = transforms
self.file_list = list()
self.mode = mode
self.num_classes = num_classes
if mode.lower() not in ['train', 'eval', 'test']:
raise Exception(
"mode should be 'train', 'eval' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("transform is necessary, but it is None.")
self.data_dir = data_dir
if mode == 'train':
if train_list is None:
raise Exception(
'When mode is "train", train_list is need, but it is None.')
elif not os.path.exists(train_list):
raise Exception(
'train_list is not found: {}'.format(train_list))
else:
file_list = train_list
elif mode == 'eval':
if val_list is None:
raise Exception(
'When mode is "eval", val_list is need, but it is None.')
elif not os.path.exists(val_list):
raise Exception('val_list is not found: {}'.format(val_list))
else:
file_list = val_list
else:
if test_list is None:
raise Exception(
'When mode is "test", test_list is need, but it is None.')
elif not os.path.exists(test_list):
raise Exception('test_list is not found: {}'.format(test_list))
else:
file_list = test_list
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split(separator)
if len(items) != 2:
if mode == 'train' or mode == 'eval':
raise Exception(
"File list format incorrect! It should be"
" image_name{}label_name\\n".format(separator))
image_path = os.path.join(self.data_dir, items[0])
grt_path = None
else:
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'train':
im, im_info, label = self.transforms(im=image_path, label=grt_path)
return im, label
elif self.mode == 'eval':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
...@@ -14,8 +14,7 @@ ...@@ -14,8 +14,7 @@
import os import os
from paddle.fluid.io import Dataset from .dataset import Dataset
from utils.download import download_file_and_uncompress from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
...@@ -70,16 +69,3 @@ class OpticDiscSeg(Dataset): ...@@ -70,16 +69,3 @@ class OpticDiscSeg(Dataset):
image_path = os.path.join(self.data_dir, items[0]) image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1]) grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path]) self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
im, im_info, label = self.transforms(im=image_path, label=grt_path)
if self.mode == 'train':
return im, label
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
...@@ -24,7 +24,7 @@ import tqdm ...@@ -24,7 +24,7 @@ import tqdm
from datasets import OpticDiscSeg, Cityscapes from datasets import OpticDiscSeg, Cityscapes
import transforms as T import transforms as T
import models from models import MODELS
import utils import utils
import utils.logging as logging import utils.logging as logging
from utils import get_environ_info from utils import get_environ_info
...@@ -37,7 +37,8 @@ def parse_args(): ...@@ -37,7 +37,8 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--model_name', '--model_name',
dest='model_name', dest='model_name',
help="Model type for traing, which is one of ('UNet')", help='Model type for testing, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str, type=str,
default='UNet') default='UNet')
...@@ -97,19 +98,20 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'): ...@@ -97,19 +98,20 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
logging.info("Start to predict...") logging.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset): for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = im[np.newaxis, ...]
im = to_variable(im) im = to_variable(im)
pred, _ = model(im, mode='test') pred, _ = model(im, mode='test')
pred = pred.numpy() pred = pred.numpy()
pred = np.squeeze(pred).astype('uint8') pred = np.squeeze(pred).astype('uint8')
keys = list(im_info.keys()) for info in im_info[::-1]:
for k in keys[::-1]: if info[0] == 'resize':
if k == 'shape_before_resize': h, w = info[1][0], info[1][1]
h, w = im_info[k][0], im_info[k][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif k == 'shape_before_padding': elif info[0] == 'padding':
h, w = im_info[k][0], im_info[k][1] h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w] pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
im_file = im_path.replace(test_dataset.data_dir, '') im_file = im_path.replace(test_dataset.data_dir, '')
if im_file[0] == '/': if im_file[0] == '/':
...@@ -146,8 +148,11 @@ def main(args): ...@@ -146,8 +148,11 @@ def main(args):
test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()]) test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
test_dataset = dataset(transforms=test_transforms, mode='test') test_dataset = dataset(transforms=test_transforms, mode='test')
if args.model_name == 'UNet': if args.model_name not in MODELS:
model = models.UNet(num_classes=test_dataset.num_classes) raise Exception(
'--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=test_dataset.num_classes)
infer( infer(
model, model,
......
...@@ -13,3 +13,28 @@ ...@@ -13,3 +13,28 @@
# limitations under the License. # limitations under the License.
from .unet import UNet from .unet import UNet
from .hrnet import *
MODELS = {
"UNet": UNet,
"HRNet_W18_Small_V1": HRNet_W18_Small_V1,
"HRNet_W18_Small_V2": HRNet_W18_Small_V2,
"HRNet_W18": HRNet_W18,
"HRNet_W30": HRNet_W30,
"HRNet_W32": HRNet_W32,
"HRNet_W40": HRNet_W40,
"HRNet_W44": HRNet_W44,
"HRNet_W48": HRNet_W48,
"HRNet_W60": HRNet_W48,
"HRNet_W64": HRNet_W64,
"SE_HRNet_W18_Small_V1": SE_HRNet_W18_Small_V1,
"SE_HRNet_W18_Small_V2": SE_HRNet_W18_Small_V2,
"SE_HRNet_W18": SE_HRNet_W18,
"SE_HRNet_W30": SE_HRNet_W30,
"SE_HRNet_W32": SE_HRNet_W30,
"SE_HRNet_W40": SE_HRNet_W40,
"SE_HRNet_W44": SE_HRNet_W44,
"SE_HRNet_W48": SE_HRNet_W48,
"SE_HRNet_W60": SE_HRNet_W60,
"SE_HRNet_W64": SE_HRNet_W64
}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
try:
from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
except:
from paddle.fluid.dygraph import BatchNorm
__all__ = [
"HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
"HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60",
"HRNet_W64", "SE_HRNet_W18_Small_V1", "SE_HRNet_W18_Small_V2",
"SE_HRNet_W18", "SE_HRNet_W30", "SE_HRNet_W32", "SE_HRNet_W40",
"SE_HRNet_W44", "SE_HRNet_W48", "SE_HRNet_W60", "SE_HRNet_W64"
]
class HRNet(fluid.dygraph.Layer):
def __init__(self,
num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=False,
ignore_index=255):
super(HRNet, self).__init__()
self.num_classes = num_classes
self.stage1_num_modules = stage1_num_modules
self.stage1_num_blocks = stage1_num_blocks
self.stage1_num_channels = stage1_num_channels
self.stage2_num_modules = stage2_num_modules
self.stage2_num_blocks = stage2_num_blocks
self.stage2_num_channels = stage2_num_channels
self.stage3_num_modules = stage3_num_modules
self.stage3_num_blocks = stage3_num_blocks
self.stage3_num_channels = stage3_num_channels
self.stage4_num_modules = stage4_num_modules
self.stage4_num_blocks = stage4_num_blocks
self.stage4_num_channels = stage4_num_channels
self.has_se = has_se
self.ignore_index = ignore_index
self.EPS = 1e-5
self.conv_layer1_1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_1")
self.conv_layer1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_2")
self.la1 = Layer1(
num_channels=64,
num_blocks=self.stage1_num_blocks[0],
num_filters=self.stage1_num_channels[0],
has_se=has_se,
name="layer2")
self.tr1 = TransitionLayer(
in_channels=[self.stage1_num_channels[0] * 4],
out_channels=self.stage2_num_channels,
name="tr1")
self.st2 = Stage(
num_channels=self.stage2_num_channels,
num_modules=self.stage2_num_modules,
num_blocks=self.stage2_num_blocks,
num_filters=self.stage2_num_channels,
has_se=self.has_se,
name="st2")
self.tr2 = TransitionLayer(
in_channels=self.stage2_num_channels,
out_channels=self.stage3_num_channels,
name="tr2")
self.st3 = Stage(
num_channels=self.stage3_num_channels,
num_modules=self.stage3_num_modules,
num_blocks=self.stage3_num_blocks,
num_filters=self.stage3_num_channels,
name="st3")
self.tr3 = TransitionLayer(
in_channels=self.stage3_num_channels,
out_channels=self.stage4_num_channels,
name="tr3")
self.st4 = Stage(
num_channels=self.stage4_num_channels,
num_modules=self.stage4_num_modules,
num_blocks=self.stage4_num_blocks,
num_filters=self.stage4_num_channels,
name="st4")
last_inp_channels = sum(self.stage4_num_channels)
self.conv_last_2 = ConvBNLayer(
num_channels=last_inp_channels,
num_filters=last_inp_channels,
filter_size=1,
stride=1,
name='conv-2')
self.conv_last_1 = Conv2D(
num_channels=last_inp_channels,
num_filters=self.num_classes,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='conv-1_weights'))
def forward(self, x, label=None, mode='train'):
input_shape = x.shape[2:]
conv1 = self.conv_layer1_1(x)
conv2 = self.conv_layer1_2(conv1)
la1 = self.la1(conv2)
tr1 = self.tr1([la1])
st2 = self.st2(tr1)
tr2 = self.tr2(st2)
st3 = self.st3(tr2)
tr3 = self.tr3(st3)
st4 = self.st4(tr3)
x0_h, x0_w = st4[0].shape[2:]
x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
x = self.conv_last_2(x)
logit = self.conv_last_1(x)
logit = fluid.layers.resize_bilinear(logit, input_shape)
if mode == 'train':
if label is None:
raise Exception('Label is need during training')
return self._get_loss(logit, label)
else:
score_map = fluid.layers.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
def _get_loss(self, logit, label):
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = name + '_bn'
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
return y
class Layer1(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
num_blocks,
has_se=False,
name=None):
super(Layer1, self).__init__()
self.bottleneck_block_list = []
for i in range(num_blocks):
bottleneck_block = self.add_sublayer(
"bb_{}_{}".format(name, i + 1),
BottleneckBlock(
num_channels=num_channels if i == 0 else num_filters * 4,
num_filters=num_filters,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1)))
self.bottleneck_block_list.append(bottleneck_block)
def forward(self, input):
conv = input
for block_func in self.bottleneck_block_list:
conv = block_func(conv)
return conv
class TransitionLayer(fluid.dygraph.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(TransitionLayer, self).__init__()
num_in = len(in_channels)
num_out = len(out_channels)
self.conv_bn_func_list = []
for i in range(num_out):
residual = None
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[i],
num_filters=out_channels[i],
filter_size=3,
name=name + '_layer_' + str(i + 1)))
else:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[-1],
num_filters=out_channels[i],
filter_size=3,
stride=2,
name=name + '_layer_' + str(i + 1)))
self.conv_bn_func_list.append(residual)
def forward(self, input):
outs = []
for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
if conv_bn_func is None:
outs.append(input[idx])
else:
if idx < len(input):
outs.append(conv_bn_func(input[idx]))
else:
outs.append(conv_bn_func(input[-1]))
return outs
class Branches(fluid.dygraph.Layer):
def __init__(self,
num_blocks,
in_channels,
out_channels,
has_se=False,
name=None):
super(Branches, self).__init__()
self.basic_block_list = []
for i in range(len(out_channels)):
self.basic_block_list.append([])
for j in range(num_blocks[i]):
in_ch = in_channels[i] if j == 0 else out_channels[i]
basic_block_func = self.add_sublayer(
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
BasicBlock(
num_channels=in_ch,
num_filters=out_channels[i],
has_se=has_se,
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.basic_block_list[i].append(basic_block_func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
conv = input
for basic_block_func in self.basic_block_list[idx]:
conv = basic_block_func(conv)
outs.append(conv)
return outs
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False,
name=None):
super(BottleneckBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu",
name=name + "_conv1",
)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv2")
self.conv3 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_conv3")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv3 = self.se(conv3)
y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
return y
class BasicBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride=1,
has_se=False,
downsample=False,
name=None):
super(BasicBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv1")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
act=None,
name=name + "_conv2")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act="relu",
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv2 = self.se(conv2)
y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
return y
class SELayer(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
act="relu",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
act="sigmoid",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
squeeze = self.squeeze(pool)
excitation = self.excitation(squeeze)
excitation = fluid.layers.reshape(
excitation, shape=[-1, self._num_channels, 1, 1])
out = input * excitation
return out
class Stage(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_modules,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(Stage, self).__init__()
self._num_modules = num_modules
self.stage_func_list = []
for i in range(num_modules):
if i == num_modules - 1 and not multi_scale_output:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
multi_scale_output=False,
name=name + '_' + str(i + 1)))
else:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
name=name + '_' + str(i + 1)))
self.stage_func_list.append(stage_func)
def forward(self, input):
out = input
for idx in range(self._num_modules):
out = self.stage_func_list[idx](out)
return out
class HighResolutionModule(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(HighResolutionModule, self).__init__()
self.branches_func = Branches(
num_blocks=num_blocks,
in_channels=num_channels,
out_channels=num_filters,
has_se=has_se,
name=name)
self.fuse_func = FuseLayers(
in_channels=num_filters,
out_channels=num_filters,
multi_scale_output=multi_scale_output,
name=name)
def forward(self, input):
out = self.branches_func(input)
out = self.fuse_func(out)
return out
class FuseLayers(fluid.dygraph.Layer):
def __init__(self,
in_channels,
out_channels,
multi_scale_output=True,
name=None):
super(FuseLayers, self).__init__()
self._actual_ch = len(in_channels) if multi_scale_output else 1
self._in_channels = in_channels
self.residual_func_list = []
for i in range(self._actual_ch):
for j in range(len(in_channels)):
residual_func = None
if j > i:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
ConvBNLayer(
num_channels=in_channels[j],
num_filters=out_channels[i],
filter_size=1,
stride=1,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.residual_func_list.append(residual_func)
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[i],
filter_size=3,
stride=2,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[i]
else:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[j],
filter_size=3,
stride=2,
act="relu",
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[j]
self.residual_func_list.append(residual_func)
def forward(self, input):
outs = []
residual_func_idx = 0
for i in range(self._actual_ch):
residual = input[i]
for j in range(len(self._in_channels)):
if j > i:
y = self.residual_func_list[residual_func_idx](input[j])
residual_func_idx += 1
y = fluid.layers.resize_nearest(input=y, scale=2**(j - i))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
y = input[j]
for k in range(i - j):
y = self.residual_func_list[residual_func_idx](y)
residual_func_idx += 1
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
layer_helper = LayerHelper(self.full_name(), act='relu')
residual = layer_helper.append_activation(residual)
outs.append(residual)
return outs
class LastClsOut(fluid.dygraph.Layer):
def __init__(self,
num_channel_list,
has_se,
num_filters_list=[32, 64, 128, 256],
name=None):
super(LastClsOut, self).__init__()
self.func_list = []
for idx in range(len(num_channel_list)):
func = self.add_sublayer(
"conv_{}_conv_{}".format(name, idx + 1),
BottleneckBlock(
num_channels=num_channel_list[idx],
num_filters=num_filters_list[idx],
has_se=has_se,
downsample=True,
name=name + 'conv_' + str(idx + 1)))
self.func_list.append(func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
out = self.func_list[idx](input)
outs.append(out)
return outs
def HRNet_W18_Small_V1(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128])
return model
def HRNet_W18_Small_V2(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144])
return model
def HRNet_W18(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144])
return model
def HRNet_W30(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240])
return model
def HRNet_W32(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256])
return model
def HRNet_W40(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320])
return model
def HRNet_W44(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352])
return model
def HRNet_W48(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384])
return model
def HRNet_W60(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480])
return model
def HRNet_W64(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512])
return model
def SE_HRNet_W18_Small_V1(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128],
has_se=True)
return model
def SE_HRNet_W18_Small_V2(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144],
has_se=True)
return model
def SE_HRNet_W18(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=True)
return model
def SE_HRNet_W30(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240],
has_se=True)
return model
def SE_HRNet_W32(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256],
has_se=True)
return model
def SE_HRNet_W40(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320],
has_se=True)
return model
def SE_HRNet_W44(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352],
has_se=True)
return model
def SE_HRNet_W48(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384],
has_se=True)
return model
def SE_HRNet_W60(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480],
has_se=True)
return model
def SE_HRNet_W64(num_classes):
model = HRNet(
num_classes=num_classes,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512],
has_se=True)
return model
...@@ -13,7 +13,11 @@ ...@@ -13,7 +13,11 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D, BatchNorm, Pool2D from paddle.fluid.dygraph import Conv2D, Pool2D
try:
from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
except:
from paddle.fluid.dygraph import BatchNorm
class UNet(fluid.dygraph.Layer): class UNet(fluid.dygraph.Layer):
...@@ -39,6 +43,8 @@ class UNet(fluid.dygraph.Layer): ...@@ -39,6 +43,8 @@ class UNet(fluid.dygraph.Layer):
return pred, score_map return pred, score_map
def _get_loss(self, logit, label): def _get_loss(self, logit, label):
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32') mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy( loss, probs = fluid.layers.softmax_with_cross_entropy(
......
...@@ -22,7 +22,7 @@ from paddle.incubate.hapi.distributed import DistributedBatchSampler ...@@ -22,7 +22,7 @@ from paddle.incubate.hapi.distributed import DistributedBatchSampler
from datasets import OpticDiscSeg, Cityscapes from datasets import OpticDiscSeg, Cityscapes
import transforms as T import transforms as T
import models from models import MODELS
import utils.logging as logging import utils.logging as logging
from utils import get_environ_info from utils import get_environ_info
from utils import load_pretrained_model from utils import load_pretrained_model
...@@ -38,7 +38,8 @@ def parse_args(): ...@@ -38,7 +38,8 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--model_name', '--model_name',
dest='model_name', dest='model_name',
help="Model type for traing, which is one of ('UNet')", help='Model type for training, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str, type=str,
default='UNet') default='UNet')
...@@ -181,7 +182,7 @@ def train(model, ...@@ -181,7 +182,7 @@ def train(model,
total_steps = steps_per_epoch * (num_epochs - start_epoch) total_steps = steps_per_epoch * (num_epochs - start_epoch)
num_steps = 0 num_steps = 0
best_mean_iou = -1.0 best_mean_iou = -1.0
best_model_epoch = 1 best_model_epoch = -1
for epoch in range(start_epoch, num_epochs): for epoch in range(start_epoch, num_epochs):
for step, data in enumerate(loader): for step, data in enumerate(loader):
images = data[0] images = data[0]
...@@ -229,10 +230,8 @@ def train(model, ...@@ -229,10 +230,8 @@ def train(model,
mean_iou, mean_acc = evaluate( mean_iou, mean_acc = evaluate(
model, model,
eval_dataset, eval_dataset,
places=places,
model_dir=current_save_dir, model_dir=current_save_dir,
num_classes=num_classes, num_classes=num_classes,
batch_size=batch_size,
ignore_index=ignore_index, ignore_index=ignore_index,
epoch_id=epoch + 1) epoch_id=epoch + 1)
if mean_iou > best_mean_iou: if mean_iou > best_mean_iou:
...@@ -241,9 +240,9 @@ def train(model, ...@@ -241,9 +240,9 @@ def train(model,
best_model_dir = os.path.join(save_dir, "best_model") best_model_dir = os.path.join(save_dir, "best_model")
fluid.save_dygraph(model.state_dict(), fluid.save_dygraph(model.state_dict(),
os.path.join(best_model_dir, 'model')) os.path.join(best_model_dir, 'model'))
logging.info( logging.info(
'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}' 'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
.format(best_model_epoch, best_mean_iou)) .format(best_model_epoch, best_mean_iou))
if use_vdl: if use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
...@@ -286,9 +285,11 @@ def main(args): ...@@ -286,9 +285,11 @@ def main(args):
T.Normalize()]) T.Normalize()])
eval_dataset = dataset(transforms=eval_transforms, mode='eval') eval_dataset = dataset(transforms=eval_transforms, mode='eval')
if args.model_name == 'UNet': if args.model_name not in MODELS:
model = models.UNet( raise Exception(
num_classes=train_dataset.num_classes, ignore_index=255) '--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=train_dataset.num_classes)
# Creat optimizer # Creat optimizer
# todo, may less one than len(loader) # todo, may less one than len(loader)
......
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -12,27 +13,17 @@ ...@@ -12,27 +13,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .functional import *
import random import random
from collections import OrderedDict
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import cv2 import cv2
from collections import OrderedDict
from .functional import *
class Compose:
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
to_rgb (bool): 是否转化为rgb通道格式
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
""" class Compose:
def __init__(self, transforms, to_rgb=True): def __init__(self, transforms, to_rgb=True):
if not isinstance(transforms, list): if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!') raise TypeError('The transforms must be a list!')
...@@ -43,20 +34,8 @@ class Compose: ...@@ -43,20 +34,8 @@ class Compose:
self.to_rgb = to_rgb self.to_rgb = to_rgb
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息,dict中的字段如下:
- shape_before_resize (tuple): 图像resize之前的大小(h, w)。
- shape_before_padding (tuple): 图像padding之前的大小(h, w)。
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
if im_info is None: if im_info is None:
im_info = dict() im_info = list()
if isinstance(im, str): if isinstance(im, str):
im = cv2.imread(im).astype('float32') im = cv2.imread(im).astype('float32')
if isinstance(label, str): if isinstance(label, str):
...@@ -80,27 +59,10 @@ class Compose: ...@@ -80,27 +59,10 @@ class Compose:
class RandomHorizontalFlip: class RandomHorizontalFlip:
"""以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机水平翻转的概率。默认值为0.5。
"""
def __init__(self, prob=0.5): def __init__(self, prob=0.5):
self.prob = prob self.prob = prob
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob: if random.random() < self.prob:
im = horizontal_flip(im) im = horizontal_flip(im)
if label is not None: if label is not None:
...@@ -112,26 +74,10 @@ class RandomHorizontalFlip: ...@@ -112,26 +74,10 @@ class RandomHorizontalFlip:
class RandomVerticalFlip: class RandomVerticalFlip:
"""以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。
Args:
prob (float): 随机垂直翻转的概率。默认值为0.1。
"""
def __init__(self, prob=0.1): def __init__(self, prob=0.1):
self.prob = prob self.prob = prob
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if random.random() < self.prob: if random.random() < self.prob:
im = vertical_flip(im) im = vertical_flip(im)
if label is not None: if label is not None:
...@@ -143,25 +89,6 @@ class RandomVerticalFlip: ...@@ -143,25 +89,6 @@ class RandomVerticalFlip:
class Resize: class Resize:
"""调整图像大小(resize)。
- 当目标大小(target_size)类型为int时,根据插值方式,
将图像resize为[target_size, target_size]。
- 当目标大小(target_size)类型为list或tuple时,根据插值方式,
将图像resize为target_size。
注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。
Args:
target_size (int/list/tuple): 短边目标长度。默认为608。
interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为
['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC',
'AREA', 'LANCZOS4', 'RANDOM']中。
"""
# The interpolation mode # The interpolation mode
interp_dict = { interp_dict = {
'NEAREST': cv2.INTER_NEAREST, 'NEAREST': cv2.INTER_NEAREST,
...@@ -189,26 +116,9 @@ class Resize: ...@@ -189,26 +116,9 @@ class Resize:
self.target_size = target_size self.target_size = target_size
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict, 可选): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info跟新字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
"""
if im_info is None: if im_info is None:
im_info = OrderedDict() im_info = list()
im_info['shape_before_resize'] = im.shape[:2] im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray): if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.") raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3: if len(im.shape) != 3:
...@@ -228,32 +138,14 @@ class Resize: ...@@ -228,32 +138,14 @@ class Resize:
class ResizeByLong: class ResizeByLong:
"""对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
long_size (int): resize后图像的长边大小。
"""
def __init__(self, long_size): def __init__(self, long_size):
self.long_size = long_size self.long_size = long_size
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。
"""
if im_info is None: if im_info is None:
im_info = OrderedDict() im_info = list()
im_info['shape_before_resize'] = im.shape[:2] im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size) im = resize_long(im, self.long_size)
if label is not None: if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST) label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
...@@ -265,15 +157,6 @@ class ResizeByLong: ...@@ -265,15 +157,6 @@ class ResizeByLong:
class ResizeRangeScaling: class ResizeRangeScaling:
"""对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。
Args:
min_value (int): 图像长边resize后的最小值。默认值400。
max_value (int): 图像长边resize后的最大值。默认值600。
Raises:
ValueError: min_value大于max_value
"""
def __init__(self, min_value=400, max_value=600): def __init__(self, min_value=400, max_value=600):
if min_value > max_value: if min_value > max_value:
raise ValueError('min_value must be less than max_value, ' raise ValueError('min_value must be less than max_value, '
...@@ -283,17 +166,6 @@ class ResizeRangeScaling: ...@@ -283,17 +166,6 @@ class ResizeRangeScaling:
self.max_value = max_value self.max_value = max_value
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_value == self.max_value: if self.min_value == self.max_value:
random_size = self.max_value random_size = self.max_value
else: else:
...@@ -310,17 +182,6 @@ class ResizeRangeScaling: ...@@ -310,17 +182,6 @@ class ResizeRangeScaling:
class ResizeStepScaling: class ResizeStepScaling:
"""对图像按照某一个比例resize,这个比例以scale_step_size为步长
在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。
Args:
min_scale_factor(float), resize最小尺度。默认值0.75。
max_scale_factor (float), resize最大尺度。默认值1.25。
scale_step_size (float), resize尺度范围间隔。默认值0.25。
Raises:
ValueError: min_scale_factor大于max_scale_factor
"""
def __init__(self, def __init__(self,
min_scale_factor=0.75, min_scale_factor=0.75,
max_scale_factor=1.25, max_scale_factor=1.25,
...@@ -335,17 +196,6 @@ class ResizeStepScaling: ...@@ -335,17 +196,6 @@ class ResizeStepScaling:
self.scale_step_size = scale_step_size self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale_factor == self.max_scale_factor: if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor scale_factor = self.min_scale_factor
...@@ -375,17 +225,6 @@ class ResizeStepScaling: ...@@ -375,17 +225,6 @@ class ResizeStepScaling:
class Normalize: class Normalize:
"""对图像进行标准化。
1.尺度缩放到 [0,1]。
2.对图像进行减均值除以标准差操作。
Args:
mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。
std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。
Raises:
ValueError: mean或std不是list对象。std包含0。
"""
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean self.mean = mean
self.std = std self.std = std
...@@ -396,18 +235,6 @@ class Normalize: ...@@ -396,18 +235,6 @@ class Normalize:
raise ValueError('{}: std is invalid!'.format(self)) raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
mean = np.array(self.mean)[np.newaxis, np.newaxis, :] mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std) im = normalize(im, mean, std)
...@@ -419,18 +246,6 @@ class Normalize: ...@@ -419,18 +246,6 @@ class Normalize:
class Padding: class Padding:
"""对图像或标注图像进行padding,padding方向为右和下。
根据提供的值对图像或标注图像进行padding操作。
Args:
target_size (int|list|tuple): padding后图像的大小。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: target_size不是int|list|tuple。
ValueError: target_size为list|tuple时元素个数不等于2。
"""
def __init__(self, def __init__(self,
target_size, target_size,
im_padding_value=[127.5, 127.5, 127.5], im_padding_value=[127.5, 127.5, 127.5],
...@@ -449,25 +264,9 @@ class Padding: ...@@ -449,25 +264,9 @@ class Padding:
self.label_padding_value = label_padding_value self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
其中,im_info新增字段为:
-shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。
Raises:
ValueError: 输入图像im或label的形状大于目标值
"""
if im_info is None: if im_info is None:
im_info = OrderedDict() im_info = list()
im_info['shape_before_padding'] = im.shape[:2] im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1] im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int): if isinstance(self.target_size, int):
...@@ -483,21 +282,23 @@ class Padding: ...@@ -483,21 +282,23 @@ class Padding:
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height)) .format(im_width, im_height, target_width, target_height))
else: else:
im = cv2.copyMakeBorder(im, im = cv2.copyMakeBorder(
0, im,
pad_height, 0,
0, pad_height,
pad_width, 0,
cv2.BORDER_CONSTANT, pad_width,
value=self.im_padding_value) cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None: if label is not None:
label = cv2.copyMakeBorder(label, label = cv2.copyMakeBorder(
0, label,
pad_height, 0,
0, pad_height,
pad_width, 0,
cv2.BORDER_CONSTANT, pad_width,
value=self.label_padding_value) cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None: if label is None:
return (im, im_info) return (im, im_info)
else: else:
...@@ -505,17 +306,6 @@ class Padding: ...@@ -505,17 +306,6 @@ class Padding:
class RandomPaddingCrop: class RandomPaddingCrop:
"""对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。
Args:
crop_size (int|list|tuple): 裁剪图像大小。默认为512。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认值为255。
Raises:
TypeError: crop_size不是int/list/tuple。
ValueError: target_size为list/tuple时元素个数不等于2。
"""
def __init__(self, def __init__(self,
crop_size=512, crop_size=512,
im_padding_value=[127.5, 127.5, 127.5], im_padding_value=[127.5, 127.5, 127.5],
...@@ -534,17 +324,6 @@ class RandomPaddingCrop: ...@@ -534,17 +324,6 @@ class RandomPaddingCrop:
self.label_padding_value = label_padding_value self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if isinstance(self.crop_size, int): if isinstance(self.crop_size, int):
crop_width = self.crop_size crop_width = self.crop_size
crop_height = self.crop_size crop_height = self.crop_size
...@@ -564,21 +343,23 @@ class RandomPaddingCrop: ...@@ -564,21 +343,23 @@ class RandomPaddingCrop:
pad_height = max(crop_height - img_height, 0) pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0) pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0): if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(im, im = cv2.copyMakeBorder(
0, im,
pad_height, 0,
0, pad_height,
pad_width, 0,
cv2.BORDER_CONSTANT, pad_width,
value=self.im_padding_value) cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None: if label is not None:
label = cv2.copyMakeBorder(label, label = cv2.copyMakeBorder(
0, label,
pad_height, 0,
0, pad_height,
pad_width, 0,
cv2.BORDER_CONSTANT, pad_width,
value=self.label_padding_value) cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0] img_height = im.shape[0]
img_width = im.shape[1] img_width = im.shape[1]
...@@ -586,11 +367,11 @@ class RandomPaddingCrop: ...@@ -586,11 +367,11 @@ class RandomPaddingCrop:
h_off = np.random.randint(img_height - crop_height + 1) h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1) w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(w_off + im = im[h_off:(crop_height + h_off), w_off:(
crop_width), :] w_off + crop_width), :]
if label is not None: if label is not None:
label = label[h_off:(crop_height + label = label[h_off:(crop_height + h_off), w_off:(
h_off), w_off:(w_off + crop_width)] w_off + crop_width)]
if label is None: if label is None:
return (im, im_info) return (im, im_info)
else: else:
...@@ -598,26 +379,10 @@ class RandomPaddingCrop: ...@@ -598,26 +379,10 @@ class RandomPaddingCrop:
class RandomBlur: class RandomBlur:
"""以一定的概率对图像进行高斯模糊。
Args:
prob (float): 图像模糊概率。默认为0.1。
"""
def __init__(self, prob=0.1): def __init__(self, prob=0.1):
self.prob = prob self.prob = prob
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.prob <= 0: if self.prob <= 0:
n = 0 n = 0
elif self.prob >= 1: elif self.prob >= 1:
...@@ -640,16 +405,6 @@ class RandomBlur: ...@@ -640,16 +405,6 @@ class RandomBlur:
class RandomRotation: class RandomRotation:
"""对图像进行随机旋转。
在不超过最大旋转角度的情况下,图像进行随机旋转,当存在标注图像时,同步进行,
并对旋转后的图像和标注图像进行相应的padding。
Args:
max_rotation (float): 最大旋转角度。默认为15度。
im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。
label_padding_value (int): 标注图像padding的值。默认为255。
"""
def __init__(self, def __init__(self,
max_rotation=15, max_rotation=15,
im_padding_value=[127.5, 127.5, 127.5], im_padding_value=[127.5, 127.5, 127.5],
...@@ -659,17 +414,6 @@ class RandomRotation: ...@@ -659,17 +414,6 @@ class RandomRotation:
self.label_padding_value = label_padding_value self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.max_rotation > 0: if self.max_rotation > 0:
(h, w) = im.shape[:2] (h, w) = im.shape[:2]
do_rotation = np.random.uniform(-self.max_rotation, do_rotation = np.random.uniform(-self.max_rotation,
...@@ -686,18 +430,20 @@ class RandomRotation: ...@@ -686,18 +430,20 @@ class RandomRotation:
r[0, 2] += (nw / 2) - cx r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy r[1, 2] += (nh / 2) - cy
dsize = (nw, nh) dsize = (nw, nh)
im = cv2.warpAffine(im, im = cv2.warpAffine(
r, im,
dsize=dsize, r,
flags=cv2.INTER_LINEAR, dsize=dsize,
borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LINEAR,
borderValue=self.im_padding_value) borderMode=cv2.BORDER_CONSTANT,
label = cv2.warpAffine(label, borderValue=self.im_padding_value)
r, label = cv2.warpAffine(
dsize=dsize, label,
flags=cv2.INTER_NEAREST, r,
borderMode=cv2.BORDER_CONSTANT, dsize=dsize,
borderValue=self.label_padding_value) flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
if label is None: if label is None:
return (im, im_info) return (im, im_info)
...@@ -706,29 +452,11 @@ class RandomRotation: ...@@ -706,29 +452,11 @@ class RandomRotation:
class RandomScaleAspect: class RandomScaleAspect:
"""裁剪并resize回原始尺寸的图像和标注图像。
按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。
Args:
min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。
aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。
"""
def __init__(self, min_scale=0.5, aspect_ratio=0.33): def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale self.min_scale = min_scale
self.aspect_ratio = aspect_ratio self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
if self.min_scale != 0 and self.aspect_ratio != 0: if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0] img_height = im.shape[0]
img_width = im.shape[1] img_width = im.shape[1]
...@@ -751,10 +479,12 @@ class RandomScaleAspect: ...@@ -751,10 +479,12 @@ class RandomScaleAspect:
im = im[h1:(h1 + dh), w1:(w1 + dw), :] im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)] label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(im, (img_width, img_height), im = cv2.resize(
interpolation=cv2.INTER_LINEAR) im, (img_width, img_height),
label = cv2.resize(label, (img_width, img_height), interpolation=cv2.INTER_LINEAR)
interpolation=cv2.INTER_NEAREST) label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break break
if label is None: if label is None:
return (im, im_info) return (im, im_info)
...@@ -763,21 +493,6 @@ class RandomScaleAspect: ...@@ -763,21 +493,6 @@ class RandomScaleAspect:
class RandomDistort: class RandomDistort:
"""对图像进行随机失真。
1. 对变换的操作顺序进行随机化操作。
2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。
Args:
brightness_range (float): 明亮度因子的范围。默认为0.5。
brightness_prob (float): 随机调整明亮度的概率。默认为0.5。
contrast_range (float): 对比度因子的范围。默认为0.5。
contrast_prob (float): 随机调整对比度的概率。默认为0.5。
saturation_range (float): 饱和度因子的范围。默认为0.5。
saturation_prob (float): 随机调整饱和度的概率。默认为0.5。
hue_range (int): 色调因子的范围。默认为18。
hue_prob (float): 随机调整色调的概率。默认为0.5。
"""
def __init__(self, def __init__(self,
brightness_range=0.5, brightness_range=0.5,
brightness_prob=0.5, brightness_prob=0.5,
...@@ -797,17 +512,6 @@ class RandomDistort: ...@@ -797,17 +512,6 @@ class RandomDistort:
self.hue_prob = hue_prob self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None): def __call__(self, im, im_info=None, label=None):
"""
Args:
im (np.ndarray): 图像np.ndarray数据。
im_info (dict): 存储与图像相关的信息。
label (np.ndarray): 标注图像np.ndarray数据。
Returns:
tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、
存储与图像相关信息的字典和标注图像np.ndarray数据。
"""
brightness_lower = 1 - self.brightness_range brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range contrast_lower = 1 - self.contrast_range
......
...@@ -52,7 +52,11 @@ def load_pretrained_model(model, pretrained_model): ...@@ -52,7 +52,11 @@ def load_pretrained_model(model, pretrained_model):
logging.info('Load pretrained model from {}'.format(pretrained_model)) logging.info('Load pretrained model from {}'.format(pretrained_model))
if os.path.exists(pretrained_model): if os.path.exists(pretrained_model):
ckpt_path = os.path.join(pretrained_model, 'model') ckpt_path = os.path.join(pretrained_model, 'model')
para_state_dict, _ = fluid.load_dygraph(ckpt_path) try:
para_state_dict, _ = fluid.load_dygraph(ckpt_path)
except:
para_state_dict = fluid.load_program_state(pretrained_model)
model_state_dict = model.state_dict() model_state_dict = model.state_dict()
keys = model_state_dict.keys() keys = model_state_dict.keys()
num_params_loaded = 0 num_params_loaded = 0
......
...@@ -16,8 +16,10 @@ import argparse ...@@ -16,8 +16,10 @@ import argparse
import os import os
import math import math
from paddle.fluid.dygraph.base import to_variable
import numpy as np import numpy as np
import tqdm
import cv2
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import DataLoader from paddle.fluid.io import DataLoader
...@@ -25,7 +27,7 @@ from paddle.fluid.dataloader import BatchSampler ...@@ -25,7 +27,7 @@ from paddle.fluid.dataloader import BatchSampler
from datasets import OpticDiscSeg, Cityscapes from datasets import OpticDiscSeg, Cityscapes
import transforms as T import transforms as T
import models from models import MODELS
import utils.logging as logging import utils.logging as logging
from utils import get_environ_info from utils import get_environ_info
from utils import ConfusionMatrix from utils import ConfusionMatrix
...@@ -39,7 +41,8 @@ def parse_args(): ...@@ -39,7 +41,8 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--model_name', '--model_name',
dest='model_name', dest='model_name',
help="Model type for evaluation, which is one of ('UNet')", help='Model type for evaluation, which is one of {}'.format(
str(list(MODELS.keys()))),
type=str, type=str,
default='UNet') default='UNet')
...@@ -60,12 +63,6 @@ def parse_args(): ...@@ -60,12 +63,6 @@ def parse_args():
nargs=2, nargs=2,
default=[512, 512], default=[512, 512],
type=int) type=int)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size',
type=int,
default=2)
parser.add_argument( parser.add_argument(
'--model_dir', '--model_dir',
dest='model_dir', dest='model_dir',
...@@ -78,10 +75,8 @@ def parse_args(): ...@@ -78,10 +75,8 @@ def parse_args():
def evaluate(model, def evaluate(model,
eval_dataset=None, eval_dataset=None,
places=None,
model_dir=None, model_dir=None,
num_classes=None, num_classes=None,
batch_size=2,
ignore_index=255, ignore_index=255,
epoch_id=None): epoch_id=None):
ckpt_path = os.path.join(model_dir, 'model') ckpt_path = os.path.join(model_dir, 'model')
...@@ -89,15 +84,7 @@ def evaluate(model, ...@@ -89,15 +84,7 @@ def evaluate(model,
model.set_dict(para_state_dict) model.set_dict(para_state_dict)
model.eval() model.eval()
batch_sampler = BatchSampler( total_steps = len(eval_dataset)
eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
loader = DataLoader(
eval_dataset,
batch_sampler=batch_sampler,
places=places,
return_list=True,
)
total_steps = len(batch_sampler)
conf_mat = ConfusionMatrix(num_classes, streaming=True) conf_mat = ConfusionMatrix(num_classes, streaming=True)
logging.info( logging.info(
...@@ -105,15 +92,26 @@ def evaluate(model, ...@@ -105,15 +92,26 @@ def evaluate(model,
len(eval_dataset), total_steps)) len(eval_dataset), total_steps))
timer = Timer() timer = Timer()
timer.start() timer.start()
for step, data in enumerate(loader): for step, (im, im_info, label) in enumerate(eval_dataset):
images = data[0] im = to_variable(im)
labels = data[1].astype('int64') pred, _ = model(im, mode='eval')
pred, _ = model(images, mode='eval') pred = pred.numpy().astype('float32')
pred = np.squeeze(pred)
pred = pred.numpy() for info in im_info[::-1]:
labels = labels.numpy() if info[0] == 'resize':
mask = labels != ignore_index h, w = info[1][0], info[1][1]
conf_mat.calculate(pred=pred, label=labels, ignore=mask) pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
pred = pred[np.newaxis, :, :, np.newaxis]
pred = pred.astype('int64')
mask = label != ignore_index
conf_mat.calculate(pred=pred, label=label, ignore=mask)
_, iou = conf_mat.mean_iou() _, iou = conf_mat.mean_iou()
time_step = timer.elapsed_time() time_step = timer.elapsed_time()
...@@ -153,16 +151,17 @@ def main(args): ...@@ -153,16 +151,17 @@ def main(args):
eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()]) eval_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
eval_dataset = dataset(transforms=eval_transforms, mode='eval') eval_dataset = dataset(transforms=eval_transforms, mode='eval')
if args.model_name == 'UNet': if args.model_name not in MODELS:
model = models.UNet(num_classes=eval_dataset.num_classes) raise Exception(
'--model_name is invalid. it should be one of {}'.format(
str(list(MODELS.keys()))))
model = MODELS[args.model_name](num_classes=eval_dataset.num_classes)
evaluate( evaluate(
model, model,
eval_dataset, eval_dataset,
places=places,
model_dir=args.model_dir, model_dir=args.model_dir,
num_classes=eval_dataset.num_classes, num_classes=eval_dataset.num_classes)
batch_size=args.batch_size)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册