提交 4231b3f7 编写于 作者: C chenguowei01

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleSeg into dygraph

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .train import train
from .val import evaluate
from .infer import infer
__all__ = ['train', 'evaluate', 'infer']
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import paddle.fluid as fluid
import cv2
import tqdm
from dygraph import utils
import dygraph.utils.logger as logger
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
ckpt_path = os.path.join(model_dir, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
model.eval()
added_saved_dir = os.path.join(save_dir, 'added')
pred_saved_dir = os.path.join(save_dir, 'prediction')
logger.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = to_variable(im)
pred, _ = model(im)
pred = pred.numpy()
pred = np.squeeze(pred).astype('uint8')
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
im_file = im_path.replace(test_dataset.dataset_root, '')
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save prediction
pred_im = utils.visualize(im_path, pred, weight=0.0)
pred_saved_path = os.path.join(pred_saved_dir, im_file)
mkdir(pred_saved_path)
cv2.imwrite(pred_saved_path, pred_im)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import DataLoader
from paddle.io import DistributedBatchSampler
import paddle.nn.functional as F
import dygraph.utils.logger as logger
from dygraph.utils import load_pretrained_model
from dygraph.utils import resume
from dygraph.utils import Timer, calculate_eta
from .val import evaluate
def check_logits_losses(logits, losses):
len_logits = len(logits)
len_losses = len(losses['types'])
if len_logits != len_losses:
raise RuntimeError(
'The length of logits should equal to the types of loss config: {} != {}.'
.format(len_logits, len_losses))
def loss_computation(logits, label, losses):
check_logits_losses(logits, losses)
loss = 0
for i in range(len(logits)):
logit = logits[i]
if logit.shape[-2:] != label.shape[-2:]:
logit = F.resize_bilinear(logit, label.shape[-2:])
loss_i = losses['types'][i](logit, label)
loss += losses['coef'][i] * loss_i
return loss
def train(model,
train_dataset,
places=None,
eval_dataset=None,
optimizer=None,
save_dir='output',
iters=10000,
batch_size=2,
resume_model=None,
save_interval_iters=1000,
log_iters=10,
num_classes=None,
num_workers=8,
use_vdl=False,
losses=None,
ignore_index=255):
nranks = ParallelEnv().nranks
start_iter = 0
if resume_model is not None:
start_iter = resume(model, optimizer, resume_model)
if not os.path.isdir(save_dir):
if os.path.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
if nranks > 1:
strategy = fluid.dygraph.prepare_context()
ddp_model = fluid.dygraph.DataParallel(model, strategy)
batch_sampler = DistributedBatchSampler(
train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
loader = DataLoader(
train_dataset,
batch_sampler=batch_sampler,
places=places,
num_workers=num_workers,
return_list=True,
)
if use_vdl:
from visualdl import LogWriter
log_writer = LogWriter(save_dir)
timer = Timer()
avg_loss = 0.0
iters_per_epoch = len(batch_sampler)
best_mean_iou = -1.0
best_model_iter = -1
train_reader_cost = 0.0
train_batch_cost = 0.0
timer.start()
iter = start_iter
while iter < iters:
for data in loader:
iter += 1
if iter > iters:
break
train_reader_cost += timer.elapsed_time()
images = data[0]
labels = data[1].astype('int64')
if nranks > 1:
logits = ddp_model(images)
loss = loss_computation(logits, labels, losses)
# loss = ddp_model(images, labels)
# apply_collective_grads sum grads over multiple gpus.
loss = ddp_model.scale_loss(loss)
loss.backward()
ddp_model.apply_collective_grads()
else:
logits = model(images)
loss = loss_computation(logits, labels, losses)
# loss = model(images, labels)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
avg_loss += loss.numpy()[0]
lr = optimizer.current_step_lr()
train_batch_cost += timer.elapsed_time()
if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
avg_loss /= log_iters
avg_train_reader_cost = train_reader_cost / log_iters
avg_train_batch_cost = train_batch_cost / log_iters
train_reader_cost = 0.0
train_batch_cost = 0.0
remain_iters = iters - iter
eta = calculate_eta(remain_iters, avg_train_batch_cost)
logger.info(
"[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
.format((iter - 1) // iters_per_epoch + 1, iter, iters,
avg_loss * nranks, lr, avg_train_batch_cost,
avg_train_reader_cost, eta))
if use_vdl:
log_writer.add_scalar('Train/loss', avg_loss * nranks, iter)
log_writer.add_scalar('Train/lr', lr, iter)
log_writer.add_scalar('Train/batch_cost',
avg_train_batch_cost, iter)
log_writer.add_scalar('Train/reader_cost',
avg_train_reader_cost, iter)
avg_loss = 0.0
if (iter % save_interval_iters == 0
or iter == iters) and ParallelEnv().local_rank == 0:
current_save_dir = os.path.join(save_dir,
"iter_{}".format(iter))
if not os.path.isdir(current_save_dir):
os.makedirs(current_save_dir)
fluid.save_dygraph(model.state_dict(),
os.path.join(current_save_dir, 'model'))
fluid.save_dygraph(optimizer.state_dict(),
os.path.join(current_save_dir, 'model'))
if eval_dataset is not None:
mean_iou, avg_acc = evaluate(
model,
eval_dataset,
model_dir=current_save_dir,
num_classes=num_classes,
ignore_index=ignore_index,
iter_id=iter)
if mean_iou > best_mean_iou:
best_mean_iou = mean_iou
best_model_iter = iter
best_model_dir = os.path.join(save_dir, "best_model")
fluid.save_dygraph(
model.state_dict(),
os.path.join(best_model_dir, 'model'))
logger.info(
'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
.format(best_model_iter, best_mean_iou))
if use_vdl:
log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
model.train()
timer.restart()
if use_vdl:
log_writer.close()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import tqdm
import cv2
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
import paddle.nn.functional as F
import paddle
import dygraph.utils.logger as logger
from dygraph.utils import ConfusionMatrix
from dygraph.utils import Timer, calculate_eta
def evaluate(model,
eval_dataset=None,
model_dir=None,
num_classes=None,
ignore_index=255,
iter_id=None):
ckpt_path = os.path.join(model_dir, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
model.eval()
total_iters = len(eval_dataset)
conf_mat = ConfusionMatrix(num_classes, streaming=True)
logger.info(
"Start to evaluating(total_samples={}, total_iters={})...".format(
len(eval_dataset), total_iters))
timer = Timer()
timer.start()
for iter, (im, im_info, label) in tqdm.tqdm(
enumerate(eval_dataset), total=total_iters):
im = to_variable(im)
# pred, _ = model(im)
logits = model(im)
pred = paddle.argmax(logits[0], axis=1)
pred = pred.numpy().astype('float32')
pred = np.squeeze(pred)
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
pred = pred[np.newaxis, :, :, np.newaxis]
pred = pred.astype('int64')
mask = label != ignore_index
conf_mat.calculate(pred=pred, label=label, ignore=mask)
_, iou = conf_mat.mean_iou()
time_iter = timer.elapsed_time()
remain_iter = total_iters - iter - 1
logger.debug(
"[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
.format(iter_id, iter + 1, total_iters, iou, time_iter,
calculate_eta(remain_iter, time_iter)))
timer.restart()
category_iou, miou = conf_mat.mean_iou()
category_acc, macc = conf_mat.accuracy()
logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
len(eval_dataset), macc, miou))
logger.info("[EVAL] Category IoU: " + str(category_iou))
logger.info("[EVAL] Category Acc: " + str(category_acc))
logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
return miou, macc
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import manager
from . import param_init
# -*- encoding: utf-8 -*-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Sequence
import inspect
class ComponentManager:
"""
Implement a manager class to add the new component properly.
The component can be added as either class or function type.
For example:
>>> model_manager = ComponentManager()
>>> class AlexNet: ...
>>> class ResNet: ...
>>> model_manager.add_component(AlexNet)
>>> model_manager.add_component(ResNet)
or pass a sequence alliteratively:
>>> model_manager.add_component([AlexNet, ResNet])
>>> print(model_manager.components_dict)
output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
Or an easier way, using it as a Python decorator, while just add it above the class declaration.
>>> model_manager = ComponentManager()
>>> @model_manager.add_component
>>> class AlexNet: ...
>>> @model_manager.add_component
>>> class ResNet: ...
>>> print(model_manager.components_dict)
output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
"""
def __init__(self):
self._components_dict = dict()
def __len__(self):
return len(self._components_dict)
def __repr__(self):
return "{}:{}".format(self.__class__.__name__,
list(self._components_dict.keys()))
def __getitem__(self, item):
if item not in self._components_dict.keys():
raise KeyError("{} does not exist in the current {}".format(
item, self))
return self._components_dict[item]
@property
def components_dict(self):
return self._components_dict
def _add_single_component(self, component):
"""
Add a single component into the corresponding manager
Args:
component (function | class): a new component
Returns:
None
"""
# Currently only support class or function type
if not (inspect.isclass(component) or inspect.isfunction(component)):
raise TypeError(
"Expect class/function type, but received {}".format(
type(component)))
# Obtain the internal name of the component
component_name = component.__name__
# Check whether the component was added already
if component_name in self._components_dict.keys():
raise KeyError("{} exists already!".format(component_name))
else:
# Take the internal name of the component as its key
self._components_dict[component_name] = component
def add_component(self, components):
"""
Add component(s) into the corresponding manager
Args:
components (function | class | list | tuple): support three types of components
Returns:
None
"""
# Check whether the type is a sequence
if isinstance(components, Sequence):
for component in components:
self._add_single_component(component)
else:
component = components
self._add_single_component(component)
return components
MODELS = ComponentManager()
BACKBONES = ComponentManager()
DATASETS = ComponentManager()
TRANSFORMS = ComponentManager()
LOSSES = ComponentManager()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
def constant_init(param, **kwargs):
initializer = fluid.initializer.Constant(**kwargs)
initializer(param, param.block)
def normal_init(param, **kwargs):
initializer = fluid.initializer.Normal(**kwargs)
initializer(param, param.block)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .dataset import Dataset
from .optic_disc_seg import OpticDiscSeg
from .cityscapes import Cityscapes
from .voc import PascalVOC
from .ade import ADE20K
DATASETS = {
"OpticDiscSeg": OpticDiscSeg,
"Cityscapes": Cityscapes,
"PascalVOC": PascalVOC,
"ADE20K": ADE20K
}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from PIL import Image
from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
from dygraph.cvlibs import manager
from dygraph.transforms import Compose
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
@manager.DATASETS.add_component
class ADE20K(Dataset):
"""ADE20K dataset `http://sceneparsing.csail.mit.edu/`.
Args:
dataset_root: The dataset directory.
mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'.
transforms: Transforms for image.
download: Whether to download dataset if `dataset_root` is None.
"""
def __init__(self,
dataset_root=None,
mode='train',
transforms=None,
download=True):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.mode = mode
self.file_list = list()
self.num_classes = 150
if mode.lower() not in ['train', 'val']:
raise Exception(
"`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}."
.format(mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
if self.dataset_root is None:
if not download:
raise Exception(
"`dataset_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL,
savepath=DATA_HOME,
extrapath=DATA_HOME,
extraname='ADEChallengeData2016')
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train':
img_dir = os.path.join(self.dataset_root, 'images/training')
grt_dir = os.path.join(self.dataset_root, 'annotations/training')
elif mode == 'val':
img_dir = os.path.join(self.dataset_root, 'images/validation')
grt_dir = os.path.join(self.dataset_root, 'annotations/validation')
img_files = os.listdir(img_dir)
grt_files = [i.replace('.jpg', '.png') for i in img_files]
for i in range(len(img_files)):
img_path = os.path.join(img_dir, img_files[i])
grt_path = os.path.join(grt_dir, grt_files[i])
self.file_list.append([img_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
elif self.mode == 'val':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label - 1
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
else:
im, im_info, label = self.transforms(im=image_path, label=grt_path)
label = label - 1
return im, label
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import glob
from .dataset import Dataset
from dygraph.cvlibs import manager
from dygraph.transforms import Compose
@manager.DATASETS.add_component
class Cityscapes(Dataset):
"""Cityscapes dataset `https://www.cityscapes-dataset.com/`.
The folder structure is as follow:
cityscapes
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
Args:
dataset_root: Cityscapes dataset directory.
mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
transforms: Transforms for image.
"""
def __init__(self, dataset_root, transforms=None, mode='train'):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
self.mode = mode
self.num_classes = 19
if mode.lower() not in ['train', 'val', 'test']:
raise Exception(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
grt_dir = os.path.join(self.dataset_root, 'gtFine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(grt_dir):
raise Exception(
"The dataset is not Found or the folder structure is nonconfoumance."
)
grt_files = sorted(
glob.glob(
os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png')))
img_files = sorted(
glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
self.file_list = [[img_path, grt_path]
for img_path, grt_path in zip(img_files, grt_files)]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
import numpy as np
from PIL import Image
from dygraph.cvlibs import manager
from dygraph.transforms import Compose
@manager.DATASETS.add_component
class Dataset(fluid.io.Dataset):
"""Pass in a custom dataset that conforms to the format.
Args:
dataset_root: The dataset directory.
num_classes: Number of classes.
mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
train_list: The train dataset file. When image_set is 'train', train_list is necessary.
The contents of train_list file are as follow:
image1.jpg ground_truth1.png
image2.jpg ground_truth2.png
val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary.
The contents is the same as train_list
test_list: The test dataset file. When image_set is 'test', test_list is necessary.
The annotation file is not necessary in test_list file.
separator: The separator of dataset list. Default: ' '.
transforms: Transforms for image.
Examples:
todo
"""
def __init__(self,
dataset_root,
num_classes,
mode='train',
train_list=None,
val_list=None,
test_list=None,
separator=' ',
transforms=None):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
self.mode = mode
self.num_classes = num_classes
if mode.lower() not in ['train', 'val', 'test']:
raise Exception(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
self.dataset_root = dataset_root
if not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train':
if train_list is None:
raise Exception(
'When `mode` is "train", `train_list` is necessary, but it is None.'
)
elif not os.path.exists(train_list):
raise Exception(
'`train_list` is not found: {}'.format(train_list))
else:
file_list = train_list
elif mode == 'val':
if val_list is None:
raise Exception(
'When `mode` is "val", `val_list` is necessary, but it is None.'
)
elif not os.path.exists(val_list):
raise Exception('`val_list` is not found: {}'.format(val_list))
else:
file_list = val_list
else:
if test_list is None:
raise Exception(
'When `mode` is "test", `test_list` is necessary, but it is None.'
)
elif not os.path.exists(test_list):
raise Exception(
'`test_list` is not found: {}'.format(test_list))
else:
file_list = test_list
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split(separator)
if len(items) != 2:
if mode == 'train' or mode == 'val':
raise Exception(
"File list format incorrect! In training or evaluation task it should be"
" image_name{}label_name\\n".format(separator))
image_path = os.path.join(self.dataset_root, items[0])
grt_path = None
else:
image_path = os.path.join(self.dataset_root, items[0])
grt_path = os.path.join(self.dataset_root, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
elif self.mode == 'val':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
else:
im, im_info, label = self.transforms(im=image_path, label=grt_path)
return im, label
def __len__(self):
return len(self.file_list)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
from dygraph.cvlibs import manager
from dygraph.transforms import Compose
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
@manager.DATASETS.add_component
class OpticDiscSeg(Dataset):
def __init__(self,
dataset_root=None,
transforms=None,
mode='train',
download=True):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
self.mode = mode
self.num_classes = 2
if mode.lower() not in ['train', 'val', 'test']:
raise Exception(
"`mode` should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
if self.dataset_root is None:
if not download:
raise Exception(
"`data_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train':
file_list = os.path.join(self.dataset_root, 'train_list.txt')
elif mode == 'val':
file_list = os.path.join(self.dataset_root, 'val_list.txt')
else:
file_list = os.path.join(self.dataset_root, 'test_list.txt')
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split()
if len(items) != 2:
if mode == 'train' or mode == 'val':
raise Exception(
"File list format incorrect! It should be"
" image_name label_name\\n")
image_path = os.path.join(self.dataset_root, items[0])
grt_path = None
else:
image_path = os.path.join(self.dataset_root, items[0])
grt_path = os.path.join(self.dataset_root, items[1])
self.file_list.append([image_path, grt_path])
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
from dygraph.cvlibs import manager
from dygraph.transforms import Compose
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
@manager.DATASETS.add_component
class PascalVOC(Dataset):
"""Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset,
please run the voc_augment.py in tools.
Args:
dataset_root: The dataset directory.
mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'.
transforms: Transforms for image.
download: Whether to download dataset if dataset_root is None.
"""
def __init__(self,
dataset_root=None,
mode='train',
transforms=None,
download=True):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.mode = mode
self.file_list = list()
self.num_classes = 21
if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']:
raise Exception(
"`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}."
.format(mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
if self.dataset_root is None:
if not download:
raise Exception(
"`dataset_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL,
savepath=DATA_HOME,
extrapath=DATA_HOME,
extraname='VOCdevkit')
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets',
'Segmentation')
if mode == 'train':
file_list = os.path.join(image_set_dir, 'train.txt')
elif mode == 'val':
file_list = os.path.join(image_set_dir, 'val.txt')
elif mode == 'trainval':
file_list = os.path.join(image_set_dir, 'trainval.txt')
elif mode == 'trainaug':
file_list = os.path.join(image_set_dir, 'train.txt')
file_list_aug = os.path.join(image_set_dir, 'aug.txt')
if not os.path.exists(file_list_aug):
raise Exception(
"When `mode` is 'trainaug', Pascal Voc dataset should be augmented, "
"Please make sure voc_augment.py has been properly run when using this mode."
)
img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages')
grt_dir = os.path.join(self.dataset_root, 'VOC2012',
'SegmentationClass')
grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012',
'SegmentationClassAug')
with open(file_list, 'r') as f:
for line in f:
line = line.strip()
image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
grt_path = os.path.join(grt_dir, ''.join([line, '.png']))
self.file_list.append([image_path, grt_path])
if mode == 'trainaug':
with open(file_list_aug, 'r') as f:
for line in f:
line = line.strip()
image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
grt_path = os.path.join(grt_dir_aug, ''.join([line,
'.png']))
self.file_list.append([image_path, grt_path])
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .architectures import *
from .losses import *
from .unet import UNet
from .deeplab import *
from .fcn import *
from .pspnet import *
from .ocrnet import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import layer_utils
from .hrnet import *
from .resnet_vd import *
from .xception_deeplab import *
from .mobilenetv3 import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.initializer import Normal
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.cvlibs import manager
from dygraph.utils import utils
from dygraph.cvlibs import param_init
__all__ = [
"HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30",
"HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64"
]
class HRNet(fluid.dygraph.Layer):
"""
HRNet:Deep High-Resolution Representation Learning for Visual Recognition
https://arxiv.org/pdf/1908.07919.pdf.
Args:
backbone_pretrained (str): the path of pretrained model.
stage1_num_modules (int): number of modules for stage1. Default 1.
stage1_num_blocks (list): number of blocks per module for stage1. Default [4].
stage1_num_channels (list): number of channels per branch for stage1. Default [64].
stage2_num_modules (int): number of modules for stage2. Default 1.
stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4]
stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36].
stage3_num_modules (int): number of modules for stage3. Default 4.
stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4]
stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72].
stage4_num_modules (int): number of modules for stage4. Default 3.
stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4]
stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144].
has_se (bool): whether to use Squeeze-and-Excitation module. Default False.
"""
def __init__(self,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=False):
super(HRNet, self).__init__()
self.stage1_num_modules = stage1_num_modules
self.stage1_num_blocks = stage1_num_blocks
self.stage1_num_channels = stage1_num_channels
self.stage2_num_modules = stage2_num_modules
self.stage2_num_blocks = stage2_num_blocks
self.stage2_num_channels = stage2_num_channels
self.stage3_num_modules = stage3_num_modules
self.stage3_num_blocks = stage3_num_blocks
self.stage3_num_channels = stage3_num_channels
self.stage4_num_modules = stage4_num_modules
self.stage4_num_blocks = stage4_num_blocks
self.stage4_num_channels = stage4_num_channels
self.has_se = has_se
self.conv_layer1_1 = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_1")
self.conv_layer1_2 = ConvBNLayer(
num_channels=64,
num_filters=64,
filter_size=3,
stride=2,
act='relu',
name="layer1_2")
self.la1 = Layer1(
num_channels=64,
num_blocks=self.stage1_num_blocks[0],
num_filters=self.stage1_num_channels[0],
has_se=has_se,
name="layer2")
self.tr1 = TransitionLayer(
in_channels=[self.stage1_num_channels[0] * 4],
out_channels=self.stage2_num_channels,
name="tr1")
self.st2 = Stage(
num_channels=self.stage2_num_channels,
num_modules=self.stage2_num_modules,
num_blocks=self.stage2_num_blocks,
num_filters=self.stage2_num_channels,
has_se=self.has_se,
name="st2")
self.tr2 = TransitionLayer(
in_channels=self.stage2_num_channels,
out_channels=self.stage3_num_channels,
name="tr2")
self.st3 = Stage(
num_channels=self.stage3_num_channels,
num_modules=self.stage3_num_modules,
num_blocks=self.stage3_num_blocks,
num_filters=self.stage3_num_channels,
has_se=self.has_se,
name="st3")
self.tr3 = TransitionLayer(
in_channels=self.stage3_num_channels,
out_channels=self.stage4_num_channels,
name="tr3")
self.st4 = Stage(
num_channels=self.stage4_num_channels,
num_modules=self.stage4_num_modules,
num_blocks=self.stage4_num_blocks,
num_filters=self.stage4_num_channels,
has_se=self.has_se,
name="st4")
def forward(self, x, label=None, mode='train'):
input_shape = x.shape[2:]
conv1 = self.conv_layer1_1(x)
conv2 = self.conv_layer1_2(conv1)
la1 = self.la1(conv2)
tr1 = self.tr1([la1])
st2 = self.st2(tr1)
tr2 = self.tr2(st2)
st3 = self.st3(tr2)
tr3 = self.tr3(st3)
st4 = self.st4(tr3)
x0_h, x0_w = st4[0].shape[2:]
x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w))
x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w))
x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w))
x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1)
return [x]
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act="relu",
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters)
self.act = act
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
if self.act == 'relu':
y = fluid.layers.relu(y)
return y
class Layer1(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
num_blocks,
has_se=False,
name=None):
super(Layer1, self).__init__()
self.bottleneck_block_list = []
for i in range(num_blocks):
bottleneck_block = self.add_sublayer(
"bb_{}_{}".format(name, i + 1),
BottleneckBlock(
num_channels=num_channels if i == 0 else num_filters * 4,
num_filters=num_filters,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1)))
self.bottleneck_block_list.append(bottleneck_block)
def forward(self, input):
conv = input
for block_func in self.bottleneck_block_list:
conv = block_func(conv)
return conv
class TransitionLayer(fluid.dygraph.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(TransitionLayer, self).__init__()
num_in = len(in_channels)
num_out = len(out_channels)
self.conv_bn_func_list = []
for i in range(num_out):
residual = None
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[i],
num_filters=out_channels[i],
filter_size=3,
name=name + '_layer_' + str(i + 1)))
else:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
ConvBNLayer(
num_channels=in_channels[-1],
num_filters=out_channels[i],
filter_size=3,
stride=2,
name=name + '_layer_' + str(i + 1)))
self.conv_bn_func_list.append(residual)
def forward(self, input):
outs = []
for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
if conv_bn_func is None:
outs.append(input[idx])
else:
if idx < len(input):
outs.append(conv_bn_func(input[idx]))
else:
outs.append(conv_bn_func(input[-1]))
return outs
class Branches(fluid.dygraph.Layer):
def __init__(self,
num_blocks,
in_channels,
out_channels,
has_se=False,
name=None):
super(Branches, self).__init__()
self.basic_block_list = []
for i in range(len(out_channels)):
self.basic_block_list.append([])
for j in range(num_blocks[i]):
in_ch = in_channels[i] if j == 0 else out_channels[i]
basic_block_func = self.add_sublayer(
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
BasicBlock(
num_channels=in_ch,
num_filters=out_channels[i],
has_se=has_se,
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.basic_block_list[i].append(basic_block_func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
conv = input
for basic_block_func in self.basic_block_list[idx]:
conv = basic_block_func(conv)
outs.append(conv)
return outs
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False,
name=None):
super(BottleneckBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu",
name=name + "_conv1",
)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv2")
self.conv3 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_conv3")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv3 = self.se(conv3)
y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu")
return y
class BasicBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride=1,
has_se=False,
downsample=False,
name=None):
super(BasicBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_conv1")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
act=None,
name=name + "_conv2")
if self.downsample:
self.conv_down = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
act="relu",
name=name + "_downsample")
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16,
name=name + '_fc')
def forward(self, input):
residual = input
conv1 = self.conv1(input)
conv2 = self.conv2(conv1)
if self.downsample:
residual = self.conv_down(input)
if self.has_se:
conv2 = self.se(conv2)
y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu")
return y
class SELayer(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = Linear(
num_channels,
med_ch,
act="relu",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_sqz_weights"),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = Linear(
med_ch,
num_filters,
act="sigmoid",
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_exc_weights"),
bias_attr=ParamAttr(name=name + '_exc_offset'))
def forward(self, input):
pool = self.pool2d_gap(input)
pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels])
squeeze = self.squeeze(pool)
excitation = self.excitation(squeeze)
excitation = fluid.layers.reshape(
excitation, shape=[-1, self._num_channels, 1, 1])
out = input * excitation
return out
class Stage(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_modules,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(Stage, self).__init__()
self._num_modules = num_modules
self.stage_func_list = []
for i in range(num_modules):
if i == num_modules - 1 and not multi_scale_output:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
multi_scale_output=False,
name=name + '_' + str(i + 1)))
else:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
name=name + '_' + str(i + 1)))
self.stage_func_list.append(stage_func)
def forward(self, input):
out = input
for idx in range(self._num_modules):
out = self.stage_func_list[idx](out)
return out
class HighResolutionModule(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None):
super(HighResolutionModule, self).__init__()
self.branches_func = Branches(
num_blocks=num_blocks,
in_channels=num_channels,
out_channels=num_filters,
has_se=has_se,
name=name)
self.fuse_func = FuseLayers(
in_channels=num_filters,
out_channels=num_filters,
multi_scale_output=multi_scale_output,
name=name)
def forward(self, input):
out = self.branches_func(input)
out = self.fuse_func(out)
return out
class FuseLayers(fluid.dygraph.Layer):
def __init__(self,
in_channels,
out_channels,
multi_scale_output=True,
name=None):
super(FuseLayers, self).__init__()
self._actual_ch = len(in_channels) if multi_scale_output else 1
self._in_channels = in_channels
self.residual_func_list = []
for i in range(self._actual_ch):
for j in range(len(in_channels)):
residual_func = None
if j > i:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
ConvBNLayer(
num_channels=in_channels[j],
num_filters=out_channels[i],
filter_size=1,
stride=1,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.residual_func_list.append(residual_func)
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[i],
filter_size=3,
stride=2,
act=None,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[i]
else:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
ConvBNLayer(
num_channels=pre_num_filters,
num_filters=out_channels[j],
filter_size=3,
stride=2,
act="relu",
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1)))
pre_num_filters = out_channels[j]
self.residual_func_list.append(residual_func)
def forward(self, input):
outs = []
residual_func_idx = 0
for i in range(self._actual_ch):
residual = input[i]
residual_shape = residual.shape[-2:]
for j in range(len(self._in_channels)):
if j > i:
y = self.residual_func_list[residual_func_idx](input[j])
residual_func_idx += 1
y = fluid.layers.resize_bilinear(
input=y, out_shape=residual_shape)
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
y = input[j]
for k in range(i - j):
y = self.residual_func_list[residual_func_idx](y)
residual_func_idx += 1
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
layer_helper = LayerHelper(self.full_name(), act='relu')
residual = layer_helper.append_activation(residual)
outs.append(residual)
return outs
class LastClsOut(fluid.dygraph.Layer):
def __init__(self,
num_channel_list,
has_se,
num_filters_list=[32, 64, 128, 256],
name=None):
super(LastClsOut, self).__init__()
self.func_list = []
for idx in range(len(num_channel_list)):
func = self.add_sublayer(
"conv_{}_conv_{}".format(name, idx + 1),
BottleneckBlock(
num_channels=num_channel_list[idx],
num_filters=num_filters_list[idx],
has_se=has_se,
downsample=True,
name=name + 'conv_' + str(idx + 1)))
self.func_list.append(func)
def forward(self, inputs):
outs = []
for idx, input in enumerate(inputs):
out = self.func_list[idx](input)
outs.append(out)
return outs
@manager.BACKBONES.add_component
def HRNet_W18_Small_V1(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W18_Small_V2(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W18(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W30(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W32(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W40(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W44(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W48(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W60(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W64(**kwargs):
model = HRNet(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512],
**kwargs)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
from dygraph.utils import utils
__all__ = [
"MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
"MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
"MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
"MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
"MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
]
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def get_padding_same(kernel_size, dilation_rate):
"""
SAME padding implementation given kernel_size and dilation_rate.
The calculation formula as following:
(F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new
where F: a feature map
k: kernel size, r: dilation rate, p: padding value, s: stride
F_new: new feature map
Args:
kernel_size (int)
dilation_rate (int)
Returns:
padding_same (int): padding value
"""
k = kernel_size
r = dilation_rate
padding_same = (k + (k - 1) * (r - 1) - 1) // 2
return padding_same
class MobileNetV3(fluid.dygraph.Layer):
def __init__(self,
backbone_pretrained=None,
scale=1.0,
model_name="small",
class_dim=1000,
output_stride=None,
**kwargs):
super(MobileNetV3, self).__init__()
inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # output 1 -> out_index=2
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # output 2 -> out_index=5
[3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish",
1], # output 3 -> out_index=11
[5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish",
1], # output 3 -> out_index=14
]
self.out_indices = [2, 5, 11, 14]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", 2], # output 1 -> out_index=0
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1], # output 2 -> out_index=3
[5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7
[5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10
]
self.out_indices = [0, 3, 7, 10]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError(
"mode[{}_model] is not implemented!".format(model_name))
###################################################
# modify stride and dilation based on output_stride
self.dilation_cfg = [1] * len(self.cfg)
self.modify_bottle_params(output_stride=output_stride)
###################################################
self.conv1 = ConvBNLayer(
in_c=3,
out_c=make_divisible(inplanes * scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
if_act=True,
act="hard_swish",
name="conv1")
self.block_list = []
inplanes = make_divisible(inplanes * scale)
for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
######################################
# add dilation rate
dilation_rate = self.dilation_cfg[i]
######################################
self.block_list.append(
ResidualUnit(
in_c=inplanes,
mid_c=make_divisible(scale * exp),
out_c=make_divisible(scale * c),
filter_size=k,
stride=s,
dilation=dilation_rate,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
self.add_sublayer(
sublayer=self.block_list[-1], name="conv" + str(i + 2))
inplanes = make_divisible(scale * c)
self.last_second_conv = ConvBNLayer(
in_c=inplanes,
out_c=make_divisible(scale * self.cls_ch_squeeze),
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
act="hard_swish",
name="conv_last")
self.pool = Pool2D(
pool_type="avg", global_pooling=True, use_cudnn=False)
self.last_conv = Conv2D(
num_channels=make_divisible(scale * self.cls_ch_squeeze),
num_filters=self.cls_ch_expand,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name="last_1x1_conv_weights"),
bias_attr=False)
self.out = Linear(
input_dim=self.cls_ch_expand,
output_dim=class_dim,
param_attr=ParamAttr("fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
self.init_weight(backbone_pretrained)
def modify_bottle_params(self, output_stride=None):
if output_stride is not None and output_stride % 2 != 0:
raise Exception("output stride must to be even number")
if output_stride is not None:
stride = 2
rate = 1
for i, _cfg in enumerate(self.cfg):
stride = stride * _cfg[-1]
if stride > output_stride:
rate = rate * _cfg[-1]
self.cfg[i][-1] = 1
self.dilation_cfg[i] = rate
def forward(self, inputs, label=None, dropout_prob=0.2):
x = self.conv1(inputs)
# A feature list saves each downsampling feature.
feat_list = []
for i, block in enumerate(self.block_list):
x = block(x)
if i in self.out_indices:
feat_list.append(x)
#print("block {}:".format(i),x.shape, self.dilation_cfg[i])
x = self.last_second_conv(x)
x = self.pool(x)
x = self.last_conv(x)
x = fluid.layers.hard_swish(x)
x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob)
x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
x = self.out(x)
return x, feat_list
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
in_c,
out_c,
filter_size,
stride,
padding,
dilation=1,
num_groups=1,
if_act=True,
act=None,
use_cudnn=True,
name=""):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = fluid.dygraph.Conv2D(
num_channels=in_c,
num_filters=out_c,
filter_size=filter_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=num_groups,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
use_cudnn=use_cudnn,
act=None)
self.bn = BatchNorm(
num_features=out_c,
weight_attr=ParamAttr(
name=name + "_bn_scale",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
bias_attr=ParamAttr(
name=name + "_bn_offset",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)))
self._act_op = layer_utils.Activation(act=None)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
if self.act == "relu":
x = fluid.layers.relu(x)
elif self.act == "hard_swish":
x = fluid.layers.hard_swish(x)
else:
print("The activation function is selected incorrectly.")
exit()
return x
class ResidualUnit(fluid.dygraph.Layer):
def __init__(self,
in_c,
mid_c,
out_c,
filter_size,
stride,
use_se,
dilation=1,
act=None,
name=''):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.if_se = use_se
self.expand_conv = ConvBNLayer(
in_c=in_c,
out_c=mid_c,
filter_size=1,
stride=1,
padding=0,
if_act=True,
act=act,
name=name + "_expand")
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding=get_padding_same(
filter_size,
dilation), #int((filter_size - 1) // 2) + (dilation - 1),
dilation=dilation,
num_groups=mid_c,
if_act=True,
act=act,
name=name + "_depthwise")
if self.if_se:
self.mid_se = SEModule(mid_c, name=name + "_se")
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
filter_size=1,
stride=1,
padding=0,
if_act=False,
act=None,
name=name + "_linear")
self.dilation = dilation
def forward(self, inputs):
x = self.expand_conv(inputs)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = fluid.layers.elementwise_add(inputs, x)
return x
class SEModule(fluid.dygraph.Layer):
def __init__(self, channel, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = fluid.dygraph.Pool2D(
pool_type="avg", global_pooling=True, use_cudnn=False)
self.conv1 = fluid.dygraph.Conv2D(
num_channels=channel,
num_filters=channel // reduction,
filter_size=1,
stride=1,
padding=0,
act="relu",
param_attr=ParamAttr(name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
self.conv2 = fluid.dygraph.Conv2D(
num_channels=channel // reduction,
num_filters=channel,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name + "_2_weights"),
bias_attr=ParamAttr(name=name + "_2_offset"))
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = self.conv2(outputs)
outputs = fluid.layers.hard_sigmoid(outputs)
return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0)
def MobileNetV3_small_x0_35(**kwargs):
model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
return model
def MobileNetV3_small_x0_5(**kwargs):
model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
return model
def MobileNetV3_small_x0_75(**kwargs):
model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0(**kwargs):
model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
return model
def MobileNetV3_small_x1_25(**kwargs):
model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
return model
def MobileNetV3_large_x0_35(**kwargs):
model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
return model
def MobileNetV3_large_x0_5(**kwargs):
model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
return model
def MobileNetV3_large_x0_75(**kwargs):
model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0(**kwargs):
model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
return model
def MobileNetV3_large_x1_25(**kwargs):
model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.utils import utils
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
from dygraph.utils import utils
__all__ = [
"ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
]
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
dilation=1,
groups=1,
is_vd_mode=False,
act=None,
name=None,
):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = Pool2D(
pool_size=2,
pool_stride=2,
pool_padding=0,
pool_type='avg',
ceil_mode=True)
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2 if dilation == 1 else 0,
dilation=dilation,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
weight_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'))
self._act_op = layer_utils.Activation(act=act)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
y = self._act_op(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
dilation=1,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.dilation = dilation
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
dilation=dilation,
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=1,
is_vd_mode=False if if_first or stride == 1 else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
####################################################################
# If given dilation rate > 1, using corresponding padding
if self.dilation > 1:
padding = self.dilation
y = fluid.layers.pad(
y, [0, 0, 0, 0, padding, padding, padding, padding])
#####################################################################
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class BasicBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv1)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet_vd(fluid.dygraph.Layer):
def __init__(self,
backbone_pretrained=None,
layers=50,
class_dim=1000,
output_stride=None,
multi_grid=(1, 2, 4),
**kwargs):
super(ResNet_vd, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024
] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
dilation_dict = None
if output_stride == 8:
dilation_dict = {2: 2, 3: 4}
elif output_stride == 16:
dilation_dict = {3: 2}
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
# self.block_list = []
self.stage_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
block_list = []
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
###############################################################################
# Add dilation rate for some segmentation tasks, if dilation_dict is not None.
dilation_rate = dilation_dict[
block] if dilation_dict and block in dilation_dict else 1
# Actually block here is 'stage', and i is 'block' in 'stage'
# At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4)
if block == 3:
dilation_rate = dilation_rate * multi_grid[i]
#print("stage {}, block {}: dilation rate".format(block, i), dilation_rate)
###############################################################################
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0
and dilation_rate == 1 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name,
dilation=dilation_rate))
block_list.append(bottleneck_block)
shortcut = True
self.stage_list.append(block_list)
else:
for block in range(len(depth)):
shortcut = False
block_list = []
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
block_list.append(basic_block)
shortcut = True
self.stage_list.append(block_list)
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_dim,
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name="fc_0.w_0"),
bias_attr=ParamAttr(name="fc_0.b_0"))
self.init_weight(backbone_pretrained)
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
# A feature list saves the output feature map of each stage.
feat_list = []
for i, stage in enumerate(self.stage_list):
for j, block in enumerate(stage):
y = block(y)
#print("stage {} block {}".format(i+1, j+1), y.shape)
feat_list.append(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return feat_list
# def init_weight(self, pretrained_model=None):
# if pretrained_model is not None:
# if os.path.exists(pretrained_model):
# utils.load_pretrained_model(self, pretrained_model)
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
@manager.BACKBONES.add_component
def ResNet18_vd(**args):
model = ResNet_vd(layers=18, **args)
return model
def ResNet34_vd(**args):
model = ResNet_vd(layers=34, **args)
return model
@manager.BACKBONES.add_component
def ResNet50_vd(**args):
model = ResNet_vd(layers=50, **args)
return model
@manager.BACKBONES.add_component
def ResNet101_vd(**args):
model = ResNet_vd(layers=101, **args)
return model
def ResNet152_vd(**args):
model = ResNet_vd(layers=152, **args)
return model
def ResNet200_vd(**args):
model = ResNet_vd(layers=200, **args)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
from dygraph.utils import utils
__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
def check_data(data, number):
if type(data) == int:
return [data] * number
assert len(data) == number
return data
def check_stride(s, os):
if s <= os:
return True
else:
return False
def check_points(count, points):
if points is None:
return False
else:
if isinstance(points, list):
return (True if count in points else False)
else:
return (True if count == points else False)
def gen_bottleneck_params(backbone='xception_65'):
if backbone == 'xception_65':
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
elif backbone == 'xception_41':
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (8, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
elif backbone == 'xception_71':
bottleneck_params = {
"entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
else:
raise Exception(
"xception backbont only support xception_41/xception_65/xception_71"
)
return bottleneck_params
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride=1,
padding=0,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=input_channels,
num_filters=output_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(name=name + "/weights"),
bias_attr=False)
self._bn = BatchNorm(
num_features=output_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
self._act_op = layer_utils.Activation(act=act)
def forward(self, inputs):
return self._act_op(self._bn(self._conv(inputs)))
class Seperate_Conv(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
stride,
filter,
dilation=1,
act=None,
name=None):
super(Seperate_Conv, self).__init__()
self._conv1 = Conv2D(
num_channels=input_channels,
num_filters=input_channels,
filter_size=filter,
stride=stride,
groups=input_channels,
padding=(filter) // 2 * dilation,
dilation=dilation,
param_attr=ParamAttr(name=name + "/depthwise/weights"),
bias_attr=False)
self._bn1 = BatchNorm(
input_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
self._act_op1 = layer_utils.Activation(act=act)
self._conv2 = Conv2D(
input_channels,
output_channels,
1,
stride=1,
groups=1,
padding=0,
param_attr=ParamAttr(name=name + "/pointwise/weights"),
bias_attr=False)
self._bn2 = BatchNorm(
output_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
self._act_op2 = layer_utils.Activation(act=act)
def forward(self, inputs):
x = self._conv1(inputs)
x = self._bn1(x)
x = self._act_op1(x)
x = self._conv2(x)
x = self._bn2(x)
x = self._act_op2(x)
return x
class Xception_Block(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
strides=1,
filter_size=3,
dilation=1,
skip_conv=True,
has_skip=True,
activation_fn_in_separable_conv=False,
name=None):
super(Xception_Block, self).__init__()
repeat_number = 3
output_channels = check_data(output_channels, repeat_number)
filter_size = check_data(filter_size, repeat_number)
strides = check_data(strides, repeat_number)
self.has_skip = has_skip
self.skip_conv = skip_conv
self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
if not activation_fn_in_separable_conv:
self._conv1 = Seperate_Conv(
input_channels,
output_channels[0],
stride=strides[0],
filter=filter_size[0],
dilation=dilation,
name=name + "/separable_conv1")
self._conv2 = Seperate_Conv(
output_channels[0],
output_channels[1],
stride=strides[1],
filter=filter_size[1],
dilation=dilation,
name=name + "/separable_conv2")
self._conv3 = Seperate_Conv(
output_channels[1],
output_channels[2],
stride=strides[2],
filter=filter_size[2],
dilation=dilation,
name=name + "/separable_conv3")
else:
self._conv1 = Seperate_Conv(
input_channels,
output_channels[0],
stride=strides[0],
filter=filter_size[0],
act="relu",
dilation=dilation,
name=name + "/separable_conv1")
self._conv2 = Seperate_Conv(
output_channels[0],
output_channels[1],
stride=strides[1],
filter=filter_size[1],
act="relu",
dilation=dilation,
name=name + "/separable_conv2")
self._conv3 = Seperate_Conv(
output_channels[1],
output_channels[2],
stride=strides[2],
filter=filter_size[2],
act="relu",
dilation=dilation,
name=name + "/separable_conv3")
if has_skip and skip_conv:
self._short = ConvBNLayer(
input_channels,
output_channels[-1],
1,
stride=strides[-1],
padding=0,
name=name + "/shortcut")
def forward(self, inputs):
layer_helper = LayerHelper(self.full_name(), act='relu')
if not self.activation_fn_in_separable_conv:
x = layer_helper.append_activation(inputs)
x = self._conv1(x)
x = layer_helper.append_activation(x)
x = self._conv2(x)
x = layer_helper.append_activation(x)
x = self._conv3(x)
else:
x = self._conv1(inputs)
x = self._conv2(x)
x = self._conv3(x)
if self.has_skip is False:
return x
if self.skip_conv:
skip = self._short(inputs)
else:
skip = inputs
return fluid.layers.elementwise_add(x, skip)
class XceptionDeeplab(fluid.dygraph.Layer):
#def __init__(self, backbone, class_dim=1000):
# add output_stride
def __init__(self,
backbone,
backbone_pretrained=None,
output_stride=16,
class_dim=1000,
**kwargs):
super(XceptionDeeplab, self).__init__()
bottleneck_params = gen_bottleneck_params(backbone)
self.backbone = backbone
self._conv1 = ConvBNLayer(
3,
32,
3,
stride=2,
padding=1,
act="relu",
name=self.backbone + "/entry_flow/conv1")
self._conv2 = ConvBNLayer(
32,
64,
3,
stride=1,
padding=1,
act="relu",
name=self.backbone + "/entry_flow/conv2")
"""
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
if output_stride == 16:
entry_block3_stride = 2
middle_block_dilation = 1
exit_block_dilations = (1, 2)
elif output_stride == 8:
entry_block3_stride = 1
middle_block_dilation = 2
exit_block_dilations = (2, 4)
"""
self.block_num = bottleneck_params["entry_flow"][0]
self.strides = bottleneck_params["entry_flow"][1]
self.chns = bottleneck_params["entry_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
self.entry_flow = []
self.middle_flow = []
self.stride = 2
self.output_stride = output_stride
s = self.stride
for i in range(self.block_num):
stride = self.strides[i] if check_stride(s * self.strides[i],
self.output_stride) else 1
xception_block = self.add_sublayer(
self.backbone + "/entry_flow/block" + str(i + 1),
Xception_Block(
input_channels=64 if i == 0 else self.chns[i - 1],
output_channels=self.chns[i],
strides=[1, 1, self.stride],
name=self.backbone + "/entry_flow/block" + str(i + 1)))
self.entry_flow.append(xception_block)
s = s * stride
self.stride = s
self.block_num = bottleneck_params["middle_flow"][0]
self.strides = bottleneck_params["middle_flow"][1]
self.chns = bottleneck_params["middle_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
s = self.stride
for i in range(self.block_num):
stride = self.strides[i] if check_stride(s * self.strides[i],
self.output_stride) else 1
xception_block = self.add_sublayer(
self.backbone + "/middle_flow/block" + str(i + 1),
Xception_Block(
input_channels=728,
output_channels=728,
strides=[1, 1, self.strides[i]],
skip_conv=False,
name=self.backbone + "/middle_flow/block" + str(i + 1)))
self.middle_flow.append(xception_block)
s = s * stride
self.stride = s
self.block_num = bottleneck_params["exit_flow"][0]
self.strides = bottleneck_params["exit_flow"][1]
self.chns = bottleneck_params["exit_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
s = self.stride
stride = self.strides[0] if check_stride(s * self.strides[0],
self.output_stride) else 1
self._exit_flow_1 = Xception_Block(
728,
self.chns[0], [1, 1, stride],
name=self.backbone + "/exit_flow/block1")
s = s * stride
stride = self.strides[1] if check_stride(s * self.strides[1],
self.output_stride) else 1
self._exit_flow_2 = Xception_Block(
self.chns[0][-1],
self.chns[1], [1, 1, stride],
dilation=2,
has_skip=False,
activation_fn_in_separable_conv=True,
name=self.backbone + "/exit_flow/block2")
s = s * stride
self.stride = s
self._drop = Dropout(p=0.5)
self._pool = Pool2D(pool_type="avg", global_pooling=True)
self._fc = Linear(
self.chns[1][-1],
class_dim,
param_attr=ParamAttr(name="fc_weights"),
bias_attr=ParamAttr(name="fc_bias"))
self.init_weight(backbone_pretrained)
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
feat_list = []
for i, ef in enumerate(self.entry_flow):
x = ef(x)
if i == 0:
feat_list.append(x)
for mf in self.middle_flow:
x = mf(x)
x = self._exit_flow_1(x)
x = self._exit_flow_2(x)
feat_list.append(x)
x = self._drop(x)
x = self._pool(x)
x = fluid.layers.squeeze(x, axes=[2, 3])
x = self._fc(x)
return x, feat_list
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
def Xception41_deeplab(**args):
model = XceptionDeeplab('xception_41', **args)
return model
@manager.BACKBONES.add_component
def Xception65_deeplab(**args):
model = XceptionDeeplab("xception_65", **args)
return model
def Xception71_deeplab(**args):
model = XceptionDeeplab("xception_71", **args)
return model
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from dygraph.cvlibs import manager
from dygraph.models.architectures import layer_utils
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid.dygraph import Conv2D
from dygraph.utils import utils
__all__ = [
'DeepLabV3P', "deeplabv3p_resnet101_vd", "deeplabv3p_resnet101_vd_os8",
"deeplabv3p_resnet50_vd", "deeplabv3p_resnet50_vd_os8",
"deeplabv3p_xception65_deeplab", "deeplabv3p_mobilenetv3_large",
"deeplabv3p_mobilenetv3_small"
]
class ImageAverage(dygraph.Layer):
"""
Global average pooling
Args:
num_channels (int): the number of input channels.
"""
def __init__(self, num_channels):
super(ImageAverage, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(
num_channels, num_filters=256, filter_size=1)
def forward(self, input):
x = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True)
x = self.conv_bn_relu(x)
x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
return x
class ASPP(dygraph.Layer):
"""
Decoder module of DeepLabV3P model
Args:
output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
in_channels (int): the number of input channels in decoder module.
using_sep_conv (bool): whether use separable conv or not. Default to True.
"""
def __init__(self, output_stride, in_channels, using_sep_conv=True):
super(ASPP, self).__init__()
if output_stride == 16:
aspp_ratios = (6, 12, 18)
elif output_stride == 8:
aspp_ratios = (12, 24, 36)
else:
raise NotImplementedError(
"Only support output_stride is 8 or 16, but received{}".format(
output_stride))
self.image_average = ImageAverage(num_channels=in_channels)
# The first aspp using 1*1 conv
self.aspp1 = layer_utils.ConvBnRelu(
num_channels=in_channels,
num_filters=256,
filter_size=1,
using_sep_conv=False)
# The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
self.aspp2 = layer_utils.ConvBnRelu(
num_channels=in_channels,
num_filters=256,
filter_size=3,
using_sep_conv=using_sep_conv,
dilation=aspp_ratios[0],
padding=aspp_ratios[0])
# The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
self.aspp3 = layer_utils.ConvBnRelu(
num_channels=in_channels,
num_filters=256,
filter_size=3,
using_sep_conv=using_sep_conv,
dilation=aspp_ratios[1],
padding=aspp_ratios[1])
# The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
self.aspp4 = layer_utils.ConvBnRelu(
num_channels=in_channels,
num_filters=256,
filter_size=3,
using_sep_conv=using_sep_conv,
dilation=aspp_ratios[2],
padding=aspp_ratios[2])
# After concat op, using 1*1 conv
self.conv_bn_relu = layer_utils.ConvBnRelu(
num_channels=1280, num_filters=256, filter_size=1)
def forward(self, x):
x1 = self.image_average(x)
x2 = self.aspp1(x)
x3 = self.aspp2(x)
x4 = self.aspp3(x)
x5 = self.aspp4(x)
x = fluid.layers.concat([x1, x2, x3, x4, x5], axis=1)
x = self.conv_bn_relu(x)
x = fluid.layers.dropout(x, dropout_prob=0.1)
return x
class Decoder(dygraph.Layer):
"""
Decoder module of DeepLabV3P model
Args:
num_classes (int): the number of classes.
in_channels (int): the number of input channels in decoder module.
using_sep_conv (bool): whether use separable conv or not. Default to True.
"""
def __init__(self, num_classes, in_channels, using_sep_conv=True):
super(Decoder, self).__init__()
self.conv_bn_relu1 = layer_utils.ConvBnRelu(
num_channels=in_channels, num_filters=48, filter_size=1)
self.conv_bn_relu2 = layer_utils.ConvBnRelu(
num_channels=304,
num_filters=256,
filter_size=3,
using_sep_conv=using_sep_conv,
padding=1)
self.conv_bn_relu3 = layer_utils.ConvBnRelu(
num_channels=256,
num_filters=256,
filter_size=3,
using_sep_conv=using_sep_conv,
padding=1)
self.conv = Conv2D(
num_channels=256, num_filters=num_classes, filter_size=1)
def forward(self, x, low_level_feat):
low_level_feat = self.conv_bn_relu1(low_level_feat)
x = fluid.layers.resize_bilinear(x, low_level_feat.shape[2:])
x = fluid.layers.concat([x, low_level_feat], axis=1)
x = self.conv_bn_relu2(x)
x = self.conv_bn_relu3(x)
x = self.conv(x)
return x
@manager.MODELS.add_component
class DeepLabV3P(dygraph.Layer):
"""
The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder
The orginal artile refers to
"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation"
Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam.
(https://arxiv.org/abs/1802.02611)
Args:
num_classes (int): the unique number of target classes.
backbone (paddle.nn.Layer): backbone networks, currently support Xception65, Resnet101_vd. Default Resnet101_vd.
model_pretrained (str): the path of pretrained model.
output_stride (int): the ratio of input size and final feature size. Default 16.
backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
the first index will be taken as a low-level feature in Deconder component;
the second one will be taken as input of ASPP component.
Usually backbone consists of four downsampling stage, and return an output of
each stage, so we set default (0, 3), which means taking feature map of the first
stage in backbone as low-level feature used in Decoder, and feature map of the fourth
stage as input of ASPP.
backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default 255.
using_sep_conv (bool): a bool value indicates whether using separable convolutions
in ASPP and Decoder components. Default True.
"""
def __init__(self,
num_classes,
backbone,
model_pretrained=None,
output_stride=16,
backbone_indices=(0, 3),
backbone_channels=(256, 2048),
ignore_index=255,
using_sep_conv=True):
super(DeepLabV3P, self).__init__()
# self.backbone = manager.BACKBONES[backbone](output_stride=output_stride)
self.backbone = backbone
self.aspp = ASPP(output_stride, backbone_channels[1], using_sep_conv)
self.decoder = Decoder(num_classes, backbone_channels[0],
using_sep_conv)
self.ignore_index = ignore_index
self.EPS = 1e-5
self.backbone_indices = backbone_indices
self.init_weight(model_pretrained)
def forward(self, input, label=None):
_, feat_list = self.backbone(input)
low_level_feat = feat_list[self.backbone_indices[0]]
x = feat_list[self.backbone_indices[1]]
x = self.aspp(x)
logit = self.decoder(x, low_level_feat)
logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
if self.training:
return self._get_loss(logit, label)
else:
score_map = fluid.layers.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
def _get_loss(self, logit, label):
"""
compute forward loss of the model
Args:
logit (tensor): the logit of model output
label (tensor): ground truth
Returns:
avg_loss (tensor): forward loss
"""
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
def build_aspp(output_stride, using_sep_conv):
return ASPP(output_stride=output_stride, using_sep_conv=using_sep_conv)
def build_decoder(num_classes, using_sep_conv):
return Decoder(num_classes, using_sep_conv=using_sep_conv)
@manager.MODELS.add_component
def deeplabv3p_resnet101_vd(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
@manager.MODELS.add_component
def deeplabv3p_resnet101_vd_os8(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='ResNet101_vd',
output_stride=8,
pretrained_model=pretrained_model,
**kwargs)
@manager.MODELS.add_component
def deeplabv3p_resnet50_vd(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
@manager.MODELS.add_component
def deeplabv3p_resnet50_vd_os8(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='ResNet50_vd',
output_stride=8,
pretrained_model=pretrained_model,
**kwargs)
@manager.MODELS.add_component
def deeplabv3p_xception65_deeplab(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='Xception65_deeplab',
pretrained_model=pretrained_model,
backbone_indices=(0, 1),
backbone_channels=(128, 2048),
**kwargs)
@manager.MODELS.add_component
def deeplabv3p_mobilenetv3_large(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='MobileNetV3_large_x1_0',
pretrained_model=pretrained_model,
backbone_indices=(0, 3),
backbone_channels=(24, 160),
**kwargs)
@manager.MODELS.add_component
def deeplabv3p_mobilenetv3_small(*args, **kwargs):
pretrained_model = None
return DeepLabV3P(
backbone='MobileNetV3_small_x1_0',
pretrained_model=pretrained_model,
backbone_indices=(0, 3),
backbone_channels=(16, 96),
**kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle import fluid, nn
from dygraph.cvlibs import manager
from dygraph.models import model_utils, pspnet
from dygraph.models.architectures import layer_utils
@manager.MODELS.add_component
class FastSCNN(fluid.dygraph.Layer):
"""
The FastSCNN implementation.
As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps)
even for high resolution images (1024x2048).
The orginal artile refers to
Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network."
(https://arxiv.org/pdf/1902.04502.pdf)
Args:
num_classes (int): the unique number of target classes. Default to 2.
enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss.
if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False.
ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
"""
def __init__(self,
num_classes=2,
enable_auxiliary_loss=False,
ignore_index=255):
super(FastSCNN, self).__init__()
self.learning_to_downsample = LearningToDownsample(32, 48, 64)
self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
self.feature_fusion = FeatureFusionModule(64, 128, 128)
self.classifier = Classifier(128, num_classes)
if enable_auxiliary_loss:
self.auxlayer = model_utils.AuxLayer(64, 32, num_classes)
self.enable_auxiliary_loss = enable_auxiliary_loss
self.ignore_index = ignore_index
def forward(self, input, label=None):
higher_res_features = self.learning_to_downsample(input)
x = self.global_feature_extractor(higher_res_features)
x = self.feature_fusion(higher_res_features, x)
logit = self.classifier(x)
logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
if self.enable_auxiliary_loss:
auxiliary_logit = self.auxlayer(higher_res_features)
auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:])
if self.training:
loss = model_utils.get_loss(logit, label)
if self.enable_auxiliary_loss:
auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
loss += (0.4 * auxiliary_loss)
return loss
else:
pred, score_map = model_utils.get_pred_score_map(logit)
return pred, score_map
class LearningToDownsample(fluid.dygraph.Layer):
"""
Learning to downsample module.
This module consists of three downsampling blocks (one Conv and two separable Conv)
Args:
dw_channels1 (int): the input channels of the first sep conv. Default to 32.
dw_channels2 (int): the input channels of the second sep conv. Default to 48.
out_channels (int): the output channels of LearningToDownsample module. Default to 64.
"""
def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
super(LearningToDownsample, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3,
num_filters=dw_channels1,
filter_size=3,
stride=2)
self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1,
num_filters=dw_channels2,
filter_size=3,
using_sep_conv=True, # using sep conv
stride=2,
padding=1)
self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2,
num_filters=out_channels,
filter_size=3,
using_sep_conv=True, # using sep conv
stride=2,
padding=1)
def forward(self, x):
x = self.conv_bn_relu(x)
x = self.dsconv_bn_relu1(x)
x = self.dsconv_bn_relu2(x)
return x
class GlobalFeatureExtractor(fluid.dygraph.Layer):
"""
Global feature extractor module
This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and
a PPModule (introduced by PSPNet).
Args:
in_channels (int): the number of input channels to the module. Default to 64.
block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128).
out_channels (int): the number of output channels of the module. Default to 128.
expansion (int): the expansion factor in bottleneck. Default to 6.
num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3).
"""
def __init__(self, in_channels=64, block_channels=(64, 96, 128),
out_channels=128, expansion=6, num_blocks=(3, 3, 3)):
super(GlobalFeatureExtractor, self).__init__()
self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion,
2)
self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1],
expansion, 2)
self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2],
expansion, 1)
self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True)
def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1):
layers = []
layers.append(block(in_channels, out_channels, expansion, stride))
for i in range(1, blocks):
layers.append(block(out_channels, out_channels, expansion, 1))
return nn.Sequential(*layers)
def forward(self, x):
x = self.bottleneck1(x)
x = self.bottleneck2(x)
x = self.bottleneck3(x)
x = self.ppm(x)
return x
class LinearBottleneck(fluid.dygraph.Layer):
"""
Single bottleneck implementation.
Args:
in_channels (int): the number of input channels to bottleneck block.
out_channels (int): the number of output channels of bottleneck block.
expansion (int). the expansion factor in bottleneck. Default to 6.
stride (int). the stride used in depth-wise conv.
"""
def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs):
super(LinearBottleneck, self).__init__()
self.use_shortcut = stride == 1 and in_channels == out_channels
expand_channels = in_channels * expansion
self.block = nn.Sequential(
# pw
layer_utils.ConvBnRelu(num_channels=in_channels,
num_filters=expand_channels,
filter_size=1,
bias_attr=False),
# dw
layer_utils.ConvBnRelu(num_channels=expand_channels,
num_filters=expand_channels,
filter_size=3,
stride=stride,
padding=1,
groups=expand_channels,
bias_attr=False),
# pw-linear
nn.Conv2D(num_channels=expand_channels,
num_filters=out_channels,
filter_size=1,
bias_attr=False),
nn.BatchNorm(out_channels)
)
def forward(self, x):
out = self.block(x)
if self.use_shortcut:
out = x + out
return out
class FeatureFusionModule(fluid.dygraph.Layer):
"""
Feature Fusion Module Implememtation.
This module fuses high-resolution feature and low-resolution feature.
Args:
high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample).
low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor).
out_channels (int). the output channels of this module.
"""
def __init__(self, high_in_channels, low_in_channels, out_channels):
super(FeatureFusionModule, self).__init__()
# There only depth-wise conv is used WITHOUT point-sied conv
self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels,
num_filters=out_channels,
filter_size=3,
padding=1,
groups=128)
self.conv_low_res = nn.Sequential(
nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1),
nn.BatchNorm(out_channels))
self.conv_high_res = nn.Sequential(
nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1),
nn.BatchNorm(out_channels))
self.relu = nn.ReLU(True)
def forward(self, high_res_input, low_res_input):
low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4)
low_res_input = self.dwconv(low_res_input)
low_res_input = self.conv_low_res(low_res_input)
high_res_input = self.conv_high_res(high_res_input)
x = high_res_input + low_res_input
return self.relu(x)
class Classifier(fluid.dygraph.Layer):
"""
The Classifier module implemetation.
This module consists of two depth-wsie conv and one conv.
Args:
input_channels (int): the input channels to this module.
num_classes (int). the unique number of target classes.
"""
def __init__(self, input_channels, num_classes):
super(Classifier, self).__init__()
self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels,
num_filters=input_channels,
filter_size=3,
using_sep_conv=True # using sep conv
)
self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels,
num_filters=input_channels,
filter_size=3,
using_sep_conv=True # using sep conv
)
self.conv = nn.Conv2D(num_channels=input_channels,
num_filters=num_classes,
filter_size=1)
def forward(self, x):
x = self.dsconv1(x)
x = self.dsconv2(x)
x = fluid.layers.dropout(x, dropout_prob=0.1)
x = self.conv(x)
return x
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.initializer import Normal
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.cvlibs import manager
from dygraph import utils
from dygraph.cvlibs import param_init
from dygraph.utils import logger
__all__ = [
"fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
"fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44",
"fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64"
]
@manager.MODELS.add_component
class FCN(fluid.dygraph.Layer):
"""
Fully Convolutional Networks for Semantic Segmentation.
https://arxiv.org/abs/1411.4038
Args:
num_classes (int): the unique number of target classes.
backbone (paddle.nn.Layer): backbone networks.
model_pretrained (str): the path of pretrained model.
backbone_indices (tuple): one values in the tuple indicte the indices of output of backbone.Default -1.
backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
channels (int): channels after conv layer before the last one.
"""
def __init__(self,
num_classes,
backbone,
backbone_pretrained=None,
model_pretrained=None,
backbone_indices=(-1, ),
backbone_channels=(270, ),
channels=None):
super(FCN, self).__init__()
self.num_classes = num_classes
self.backbone_pretrained = backbone_pretrained
self.model_pretrained = model_pretrained
self.backbone_indices = backbone_indices
if channels is None:
channels = backbone_channels[0]
self.backbone = backbone
self.conv_last_2 = ConvBNLayer(
num_channels=backbone_channels[0],
num_filters=channels,
filter_size=1,
stride=1)
self.conv_last_1 = Conv2D(
num_channels=channels,
num_filters=self.num_classes,
filter_size=1,
stride=1,
padding=0)
if self.training:
self.init_weight()
def forward(self, x):
input_shape = x.shape[2:]
fea_list = self.backbone(x)
x = fea_list[self.backbone_indices[0]]
x = self.conv_last_2(x)
logit = self.conv_last_1(x)
logit = fluid.layers.resize_bilinear(logit, input_shape)
return [logit]
def init_weight(self):
params = self.parameters()
for param in params:
param_name = param.name
if 'batch_norm' in param_name:
if 'w_0' in param_name:
param_init.constant_init(param, value=1.0)
elif 'b_0' in param_name:
param_init.constant_init(param, value=0.0)
if 'conv' in param_name and 'w_0' in param_name:
param_init.normal_init(param, scale=0.001)
if self.model_pretrained is not None:
if os.path.exists(self.model_pretrained):
utils.load_pretrained_model(self, self.model_pretrained)
else:
raise Exception('Pretrained model is not found: {}'.format(
self.model_pretrained))
elif self.backbone_pretrained is not None:
if os.path.exists(self.backbone_pretrained):
utils.load_pretrained_model(self.backbone,
self.backbone_pretrained)
else:
raise Exception('Pretrained model is not found: {}'.format(
self.backbone_pretrained))
else:
logger.warning('No pretrained model to load, train from scratch')
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act="relu"):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters)
self.act = act
def forward(self, input):
y = self._conv(input)
y = self._batch_norm(y)
if self.act == 'relu':
y = fluid.layers.relu(y)
return y
@manager.MODELS.add_component
def fcn_hrnet_w18_small_v1(*args, **kwargs):
return FCN(backbone='HRNet_W18_Small_V1', backbone_channels=(240), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w18_small_v2(*args, **kwargs):
return FCN(backbone='HRNet_W18_Small_V2', backbone_channels=(270), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w18(*args, **kwargs):
return FCN(backbone='HRNet_W18', backbone_channels=(270), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w30(*args, **kwargs):
return FCN(backbone='HRNet_W30', backbone_channels=(450), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w32(*args, **kwargs):
return FCN(backbone='HRNet_W32', backbone_channels=(480), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w40(*args, **kwargs):
return FCN(backbone='HRNet_W40', backbone_channels=(600), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w44(*args, **kwargs):
return FCN(backbone='HRNet_W44', backbone_channels=(660), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w48(*args, **kwargs):
return FCN(backbone='HRNet_W48', backbone_channels=(720), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w60(*args, **kwargs):
return FCN(backbone='HRNet_W60', backbone_channels=(900), **kwargs)
@manager.MODELS.add_component
def fcn_hrnet_w64(*args, **kwargs):
return FCN(backbone='HRNet_W64', backbone_channels=(960), **kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .cross_entroy_loss import CrossEntropyLoss
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn
import paddle.nn.functional as F
import paddle.fluid as fluid
from dygraph.cvlibs import manager
'''
@manager.LOSSES.add_component
class CrossEntropyLoss(nn.CrossEntropyLoss):
"""
Implements the cross entropy loss function.
Args:
weight (Tensor): Weight tensor, a manual rescaling weight given
to each class and the shape is (C). It has the same dimensions as class
number and the data type is float32, float64. Default ``'None'``.
ignore_index (int64): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
reduction (str): Indicate how to average the loss by batch_size,
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
Default ``'mean'``.
"""
def __init__(self, weight=None, ignore_index=255, reduction='mean'):
self.weight = weight
self.ignore_index = ignore_index
self.reduction = reduction
self.EPS = 1e-5
if self.reduction not in ['sum', 'mean', 'none']:
raise ValueError(
"The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or"
" 'none', but received %s, which is not allowed." %
self.reduction)
def forward(self, logit, label):
"""
Forward computation.
Args:
logit (Tensor): logit tensor, the data type is float32, float64. Shape is
(N, C), where C is number of classes, and if shape is more than 2D, this
is (N, C, D1, D2,..., Dk), k >= 1.
label (Variable): label tensor, the data type is int64. Shape is (N), where each
value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
(N, D1, D2,..., Dk), k >= 1.
"""
loss = paddle.nn.functional.cross_entropy(
logit,
label,
weight=self.weight,
ignore_index=self.ignore_index,
reduction=self.reduction)
mask = label != self.ignore_index
mask = paddle.cast(mask, 'float32')
avg_loss = loss / (paddle.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
'''
@manager.LOSSES.add_component
class CrossEntropyLoss(nn.Layer):
"""
Implements the cross entropy loss function.
Args:
ignore_index (int64): Specifies a target value that is ignored
and does not contribute to the input gradient. Default ``255``.
"""
def __init__(self, ignore_index=255):
super(CrossEntropyLoss, self).__init__()
self.ignore_index = ignore_index
self.EPS = 1e-5
def forward(self, logit, label):
"""
Forward computation.
Args:
logit (Tensor): logit tensor, the data type is float32, float64. Shape is
(N, C), where C is number of classes, and if shape is more than 2D, this
is (N, C, D1, D2,..., Dk), k >= 1.
label (Variable): label tensor, the data type is int64. Shape is (N), where each
value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
(N, D1, D2,..., Dk), k >= 1.
"""
if len(label.shape) != len(logit.shape):
label = paddle.unsqueeze(label, 1)
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss = fluid.layers.softmax_with_cross_entropy(
logit, label, ignore_index=self.ignore_index, axis=-1)
# if has not this operation, loss can not converge when label has ignore_index.
loss = loss * mask
loss = fluid.layers.mean(loss)
avg_loss = loss / (fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
# -*- encoding: utf-8 -*-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid.dygraph import Conv2D
#from paddle.nn import SyncBatchNorm as BatchNorm
from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
from dygraph.models.architectures import layer_utils
class FCNHead(fluid.dygraph.Layer):
"""
The FCNHead implementation used in auxilary layer
Args:
in_channels (int): the number of input channels
out_channels (int): the number of output channels
"""
def __init__(self, in_channels, out_channels):
super(FCNHead, self).__init__()
inter_channels = in_channels // 4
self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
num_filters=inter_channels,
filter_size=3,
padding=1)
self.conv = Conv2D(num_channels=inter_channels,
num_filters=out_channels,
filter_size=1)
def forward(self, x):
x = self.conv_bn_relu(x)
x = F.dropout(x, dropout_prob=0.1)
x = self.conv(x)
return x
class AuxLayer(fluid.dygraph.Layer):
"""
The auxilary layer implementation for auxilary loss
Args:
in_channels (int): the number of input channels.
inter_channels (int): intermediate channels.
out_channels (int): the number of output channels, which is usually num_classes.
"""
def __init__(self, in_channels, inter_channels, out_channels):
super(AuxLayer, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels,
num_filters=inter_channels,
filter_size=3,
padding=1)
self.conv = Conv2D(num_channels=inter_channels,
num_filters=out_channels,
filter_size=1)
def forward(self, x):
x = self.conv_bn_relu(x)
x = F.dropout(x, dropout_prob=0.1)
x = self.conv(x)
return x
def get_loss(logit, label, ignore_index=255, EPS=1e-5):
"""
compute forward loss of the model
Args:
logit (tensor): the logit of model output
label (tensor): ground truth
Returns:
avg_loss (tensor): forward loss
"""
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = paddle.mean(loss) / (paddle.mean(mask) + EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
def get_pred_score_map(logit):
"""
Get prediction and score map output in inference phase.
Args:
logit (tensor): output logit of network
Returns:
pred (tensor): predition map
score_map (tensor): score map
"""
score_map = F.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
from paddle.fluid.dygraph import Sequential, Conv2D
from dygraph.cvlibs import manager
from dygraph.models.architectures.layer_utils import ConvBnRelu
from dygraph import utils
class SpatialGatherBlock(fluid.dygraph.Layer):
def forward(self, pixels, regions):
n, c, h, w = pixels.shape
_, k, _, _ = regions.shape
# pixels: from (n, c, h, w) to (n, h*w, c)
pixels = fluid.layers.reshape(pixels, (n, c, h * w))
pixels = fluid.layers.transpose(pixels, (0, 2, 1))
# regions: from (n, k, h, w) to (n, k, h*w)
regions = fluid.layers.reshape(regions, (n, k, h * w))
regions = fluid.layers.softmax(regions, axis=2)
# feats: from (n, k, c) to (n, c, k, 1)
feats = fluid.layers.matmul(regions, pixels)
feats = fluid.layers.transpose(feats, (0, 2, 1))
feats = fluid.layers.unsqueeze(feats, axes=[-1])
return feats
class SpatialOCRModule(fluid.dygraph.Layer):
def __init__(self,
in_channels,
key_channels,
out_channels,
dropout_rate=0.1):
super(SpatialOCRModule, self).__init__()
self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
self.dropout_rate = dropout_rate
self.conv1x1 = Conv2D(2 * in_channels, out_channels, 1)
def forward(self, pixels, regions):
context = self.attention_block(pixels, regions)
feats = fluid.layers.concat([context, pixels], axis=1)
feats = self.conv1x1(feats)
feats = fluid.layers.dropout(feats, self.dropout_rate)
return feats
class ObjectAttentionBlock(fluid.dygraph.Layer):
def __init__(self, in_channels, key_channels):
super(ObjectAttentionBlock, self).__init__()
self.in_channels = in_channels
self.key_channels = key_channels
self.f_pixel = Sequential(
ConvBnRelu(in_channels, key_channels, 1),
ConvBnRelu(key_channels, key_channels, 1))
self.f_object = Sequential(
ConvBnRelu(in_channels, key_channels, 1),
ConvBnRelu(key_channels, key_channels, 1))
self.f_down = ConvBnRelu(in_channels, key_channels, 1)
self.f_up = ConvBnRelu(key_channels, in_channels, 1)
def forward(self, x, proxy):
n, _, h, w = x.shape
# query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
query = self.f_pixel(x)
query = fluid.layers.reshape(query, (n, self.key_channels, -1))
query = fluid.layers.transpose(query, (0, 2, 1))
# key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
key = self.f_object(proxy)
key = fluid.layers.reshape(key, (n, self.key_channels, -1))
# value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
value = self.f_down(proxy)
value = fluid.layers.reshape(value, (n, self.key_channels, -1))
value = fluid.layers.transpose(value, (0, 2, 1))
# sim_map (n, h1*w1, h2*w2)
sim_map = fluid.layers.matmul(query, key)
sim_map = (self.key_channels**-.5) * sim_map
sim_map = fluid.layers.softmax(sim_map, axis=-1)
# context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
context = fluid.layers.matmul(sim_map, value)
context = fluid.layers.transpose(context, (0, 2, 1))
context = fluid.layers.reshape(context, (n, self.key_channels, h, w))
context = self.f_up(context)
return context
@manager.MODELS.add_component
class OCRNet(fluid.dygraph.Layer):
def __init__(self,
num_classes,
backbone,
model_pretrained=None,
in_channels=None,
ocr_mid_channels=512,
ocr_key_channels=256,
ignore_index=255):
super(OCRNet, self).__init__()
self.ignore_index = ignore_index
self.num_classes = num_classes
self.EPS = 1e-5
self.backbone = backbone
self.spatial_gather = SpatialGatherBlock()
self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels,
ocr_mid_channels)
self.conv3x3_ocr = ConvBnRelu(
in_channels, ocr_mid_channels, 3, padding=1)
self.cls_head = Conv2D(ocr_mid_channels, self.num_classes, 1)
self.aux_head = Sequential(
ConvBnRelu(in_channels, in_channels, 3, padding=1),
Conv2D(in_channels, self.num_classes, 1))
self.init_weight(model_pretrained)
def forward(self, x, label=None):
feats = self.backbone(x)
soft_regions = self.aux_head(feats)
pixels = self.conv3x3_ocr(feats)
object_regions = self.spatial_gather(pixels, soft_regions)
ocr = self.spatial_ocr(pixels, object_regions)
logit = self.cls_head(ocr)
logit = fluid.layers.resize_bilinear(logit, x.shape[2:])
if self.training:
soft_regions = fluid.layers.resize_bilinear(soft_regions,
x.shape[2:])
cls_loss = self._get_loss(logit, label)
aux_loss = self._get_loss(soft_regions, label)
return cls_loss + 0.4 * aux_loss
score_map = fluid.layers.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model.. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
def _get_loss(self, logit, label):
"""
compute forward loss of the model
Args:
logit (tensor): the logit of model output
label (tensor): ground truth
Returns:
avg_loss (tensor): forward loss
"""
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.nn.functional as F
from paddle import fluid
from paddle.fluid.dygraph import Conv2D
from dygraph.cvlibs import manager
from dygraph.models import model_utils
from dygraph.models.architectures import layer_utils
from dygraph.utils import utils
class PSPNet(fluid.dygraph.Layer):
"""
The PSPNet implementation
The orginal artile refers to
Zhao, Hengshuang, et al. "Pyramid scene parsing network."
Proceedings of the IEEE conference on computer vision and pattern recognition. 2017.
(https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf)
Args:
num_classes (int): the unique number of target classes.
backbone (Paddle.nn.Layer): backbone name, currently support Resnet50/101.
model_pretrained (str): the path of pretrained model.
output_stride (int): the ratio of input size and final feature size. Default 16.
backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
the first index will be taken as a deep-supervision feature in auxiliary layer;
the second one will be taken as input of Pyramid Pooling Module (PPModule).
Usually backbone consists of four downsampling stage, and return an output of
each stage, so we set default (2, 3), which means taking feature map of the third
stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule.
backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024.
bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True.
ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255.
"""
def __init__(self,
num_classes,
backbone,
model_pretrained=None,
output_stride=16,
backbone_indices=(2, 3),
backbone_channels=(1024, 2048),
pp_out_channels=1024,
bin_sizes=(1, 2, 3, 6),
enable_auxiliary_loss=True,
ignore_index=255):
super(PSPNet, self).__init__()
# self.backbone = manager.BACKBONES[backbone](output_stride=output_stride,
# multi_grid=(1, 1, 1))
self.backbone = backbone
self.backbone_indices = backbone_indices
self.psp_module = PPModule(
in_channels=backbone_channels[1],
out_channels=pp_out_channels,
bin_sizes=bin_sizes)
self.conv = Conv2D(
num_channels=pp_out_channels,
num_filters=num_classes,
filter_size=1)
if enable_auxiliary_loss:
self.fcn_head = model_utils.FCNHead(
in_channels=backbone_channels[0], out_channels=num_classes)
self.enable_auxiliary_loss = enable_auxiliary_loss
self.ignore_index = ignore_index
self.init_weight(model_pretrained)
def forward(self, input, label=None):
_, feat_list = self.backbone(input)
x = feat_list[self.backbone_indices[1]]
x = self.psp_module(x)
x = F.dropout(x, dropout_prob=0.1)
logit = self.conv(x)
logit = fluid.layers.resize_bilinear(logit, input.shape[2:])
if self.enable_auxiliary_loss:
auxiliary_feat = feat_list[self.backbone_indices[0]]
auxiliary_logit = self.fcn_head(auxiliary_feat)
auxiliary_logit = fluid.layers.resize_bilinear(
auxiliary_logit, input.shape[2:])
if self.training:
loss = model_utils.get_loss(logit, label)
if self.enable_auxiliary_loss:
auxiliary_loss = model_utils.get_loss(auxiliary_logit, label)
loss += (0.4 * auxiliary_loss)
return loss
else:
pred, score_map = model_utils.get_pred_score_map(logit)
return pred, score_map
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
class PPModule(fluid.dygraph.Layer):
"""
Pyramid pooling module
Args:
in_channels (int): the number of intput channels to pyramid pooling module.
out_channels (int): the number of output channels after pyramid pooling module.
bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6).
dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True.
"""
def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True):
super(PPModule, self).__init__()
self.bin_sizes = bin_sizes
inter_channels = in_channels
if dim_reduction:
inter_channels = in_channels // len(bin_sizes)
# we use dimension reduction after pooling mentioned in original implementation.
self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes])
self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes),
num_filters=out_channels,
filter_size=3,
padding=1)
def _make_stage(self, in_channels, out_channels, size):
"""
Create one pooling layer.
In our implementation, we adopt the same dimention reduction as the original paper that might be
slightly different with other implementations.
After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations
keep the channels to be same.
Args:
in_channels (int): the number of intput channels to pyramid pooling module.
size (int): the out size of the pooled layer.
Returns:
conv (tensor): a tensor after Pyramid Pooling Module
"""
# this paddle version does not support AdaptiveAvgPool2d, so skip it here.
# prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
conv = layer_utils.ConvBnRelu(num_channels=in_channels,
num_filters=out_channels,
filter_size=1)
return conv
def forward(self, input):
cat_layers = []
for i, stage in enumerate(self.stages):
size = self.bin_sizes[i]
x = fluid.layers.adaptive_pool2d(
input, pool_size=(size, size), pool_type="max")
x = stage(x)
x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:])
cat_layers.append(x)
cat_layers = [input] + cat_layers[::-1]
cat = fluid.layers.concat(cat_layers, axis=1)
out = self.conv_bn_relu2(cat)
return out
@manager.MODELS.add_component
def pspnet_resnet101_vd(*args, **kwargs):
pretrained_model = None
return PSPNet(
backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs)
@manager.MODELS.add_component
def pspnet_resnet101_vd_os8(*args, **kwargs):
pretrained_model = None
return PSPNet(
backbone='ResNet101_vd',
output_stride=8,
pretrained_model=pretrained_model,
**kwargs)
@manager.MODELS.add_component
def pspnet_resnet50_vd(*args, **kwargs):
pretrained_model = None
return PSPNet(
backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs)
@manager.MODELS.add_component
def pspnet_resnet50_vd_os8(*args, **kwargs):
pretrained_model = None
return PSPNet(
backbone='ResNet50_vd',
output_stride=8,
pretrained_model=pretrained_model,
**kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D, Pool2D
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.cvlibs import manager
from dygraph import utils
class UNet(fluid.dygraph.Layer):
"""
U-Net: Convolutional Networks for Biomedical Image Segmentation.
https://arxiv.org/abs/1505.04597
Args:
num_classes (int): the unique number of target classes.
pretrained_model (str): the path of pretrained model.
ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
"""
def __init__(self, num_classes, model_pretrained=None, ignore_index=255):
super(UNet, self).__init__()
self.encode = UnetEncoder()
self.decode = UnetDecode()
self.get_logit = GetLogit(64, num_classes)
self.ignore_index = ignore_index
self.EPS = 1e-5
self.init_weight(model_pretrained)
def forward(self, x, label=None):
encode_data, short_cuts = self.encode(x)
decode_data = self.decode(encode_data, short_cuts)
logit = self.get_logit(decode_data)
if self.training:
return self._get_loss(logit, label)
else:
score_map = fluid.layers.softmax(logit, axis=1)
score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
pred = fluid.layers.argmax(score_map, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
return pred, score_map
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
def _get_loss(self, logit, label):
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index
mask = fluid.layers.cast(mask, 'float32')
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True
mask.stop_gradient = True
return avg_loss
class UnetEncoder(fluid.dygraph.Layer):
def __init__(self):
super(UnetEncoder, self).__init__()
self.double_conv = DoubleConv(3, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
self.down4 = Down(512, 512)
def forward(self, x):
short_cuts = []
x = self.double_conv(x)
short_cuts.append(x)
x = self.down1(x)
short_cuts.append(x)
x = self.down2(x)
short_cuts.append(x)
x = self.down3(x)
short_cuts.append(x)
x = self.down4(x)
return x, short_cuts
class UnetDecode(fluid.dygraph.Layer):
def __init__(self):
super(UnetDecode, self).__init__()
self.up1 = Up(512, 256)
self.up2 = Up(256, 128)
self.up3 = Up(128, 64)
self.up4 = Up(64, 64)
def forward(self, x, short_cuts):
x = self.up1(x, short_cuts[3])
x = self.up2(x, short_cuts[2])
x = self.up3(x, short_cuts[1])
x = self.up4(x, short_cuts[0])
return x
class DoubleConv(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(DoubleConv, self).__init__()
self.conv0 = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn0 = BatchNorm(num_filters)
self.conv1 = Conv2D(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn1 = BatchNorm(num_filters)
def forward(self, x):
x = self.conv0(x)
x = self.bn0(x)
x = fluid.layers.relu(x)
x = self.conv1(x)
x = self.bn1(x)
x = fluid.layers.relu(x)
return x
class Down(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(Down, self).__init__()
self.max_pool = Pool2D(
pool_size=2, pool_type='max', pool_stride=2, pool_padding=0)
self.double_conv = DoubleConv(num_channels, num_filters)
def forward(self, x):
x = self.max_pool(x)
x = self.double_conv(x)
return x
class Up(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(Up, self).__init__()
self.double_conv = DoubleConv(2 * num_channels, num_filters)
def forward(self, x, short_cut):
short_cut_shape = fluid.layers.shape(short_cut)
x = fluid.layers.resize_bilinear(x, short_cut_shape[2:])
x = fluid.layers.concat([x, short_cut], axis=1)
x = self.double_conv(x)
return x
class GetLogit(fluid.dygraph.Layer):
def __init__(self, num_channels, num_classes):
super(GetLogit, self).__init__()
self.conv = Conv2D(
num_channels=num_channels,
num_filters=num_classes,
filter_size=3,
stride=1,
padding=1)
def forward(self, x):
x = self.conv(x)
return x
@manager.MODELS.add_component
def unet(*args, **kwargs):
return UNet(*args, **kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from .dataset import Dataset
class Rice(Dataset):
def __init__(self, transforms=None, mode='train', download=True):
self.data_dir = "/mnt/liuyi22/PaddlePaddle/POC/rice_dataset"
self.transforms = transforms
self.file_list = list()
self.mode = mode
self.num_classes = 2
if mode.lower() not in ['train', 'eval', 'test']:
raise Exception(
"mode should be 'train', 'eval' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("transform is necessary, but it is None.")
if mode == 'train':
file_list = os.path.join(self.data_dir, 'train_list.txt')
elif mode == 'eval':
file_list = os.path.join(self.data_dir, 'val_list.txt')
else:
file_list = os.path.join(self.data_dir, 'test_list.txt')
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split()
if len(items) != 2:
if mode == 'train' or mode == 'eval':
raise Exception(
"File list format incorrect! It should be"
" image_name label_name\\n")
image_path = os.path.join(self.data_dir, items[0])
grt_path = None
else:
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
......@@ -17,8 +17,9 @@ import os
import paddle
import paddle.nn.functional as F
from paddle import nn
from paddleseg.cvlibs import manager
from paddleseg.models.common import layer_utils, model_utils
from paddleseg.models.common import layer_libs
from paddleseg.utils import utils
......@@ -88,7 +89,7 @@ class ANN(nn.Layer):
psp_size=psp_size)
self.context = nn.Sequential(
layer_utils.ConvBnRelu(
layer_libs.ConvBnRelu(
in_channels=high_in_channels,
out_channels=inter_channels,
kernel_size=3,
......@@ -106,7 +107,7 @@ class ANN(nn.Layer):
in_channels=inter_channels,
out_channels=num_classes,
kernel_size=1)
self.auxlayer = model_utils.AuxLayer(
self.auxlayer = layer_libs.AuxLayer(
in_channels=low_in_channels,
inter_channels=low_in_channels // 2,
out_channels=num_classes,
......@@ -189,7 +190,7 @@ class AFNB(nn.Layer):
key_channels, value_channels, out_channels,
size) for size in sizes
])
self.conv_bn = layer_utils.ConvBn(
self.conv_bn = layer_libs.ConvBn(
in_channels=out_channels + high_in_channels,
out_channels=out_channels,
kernel_size=1)
......@@ -243,7 +244,7 @@ class APNB(nn.Layer):
SelfAttentionBlock_APNB(in_channels, out_channels, key_channels,
value_channels, size) for size in sizes
])
self.conv_bn = layer_utils.ConvBnRelu(
self.conv_bn = layer_libs.ConvBnRelu(
in_channels=in_channels * 2,
out_channels=out_channels,
kernel_size=1)
......@@ -310,11 +311,11 @@ class SelfAttentionBlock_AFNB(nn.Layer):
if out_channels == None:
self.out_channels = high_in_channels
self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
self.f_key = layer_utils.ConvBnRelu(
self.f_key = layer_libs.ConvBnRelu(
in_channels=low_in_channels,
out_channels=key_channels,
kernel_size=1)
self.f_query = layer_utils.ConvBnRelu(
self.f_query = layer_libs.ConvBnRelu(
in_channels=high_in_channels,
out_channels=key_channels,
kernel_size=1)
......@@ -393,7 +394,7 @@ class SelfAttentionBlock_APNB(nn.Layer):
self.value_channels = value_channels
self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max")
self.f_key = layer_utils.ConvBnRelu(
self.f_key = layer_libs.ConvBnRelu(
in_channels=self.in_channels,
out_channels=self.key_channels,
kernel_size=1)
......
......@@ -27,7 +27,7 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from paddleseg.models.common import layer_utils
from paddleseg.models.common import layer_libs
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
......
......@@ -28,7 +28,7 @@ from paddle.nn import Conv2d, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d
from paddleseg.utils import utils
from paddleseg.models.common import layer_utils
from paddleseg.models.common import layer_libs, activation
from paddleseg.cvlibs import manager
__all__ = [
......@@ -68,7 +68,7 @@ class ConvBNLayer(nn.Layer):
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(out_channels)
self._act_op = layer_utils.Activation(act=act)
self._act_op = activation.Activation(act=act)
def forward(self, inputs):
if self.is_vd_mode:
......@@ -201,7 +201,7 @@ class ResNet_vd(nn.Layer):
layers=50,
class_dim=1000,
output_stride=None,
multi_grid=(1, 2, 4)):
multi_grid=(1, 1, 1)):
super(ResNet_vd, self).__init__()
self.layers = layers
......
......@@ -21,7 +21,7 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from paddleseg.models.common import layer_utils
from paddleseg.models.common import layer_libs
from paddleseg.cvlibs import manager
from paddleseg.utils import utils
......
......@@ -13,5 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import layer_utils
from . import model_utils
\ No newline at end of file
from . import layer_libs
from . import activation
from . import pyramid_pool
\ No newline at end of file
......@@ -13,114 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn.functional as F
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid.dygraph import Conv2D
from paddle.nn import SyncBatchNorm as BatchNorm
from paddle import nn
from paddle.nn.layer import activation
class ConvBnRelu(dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
using_sep_conv=False,
**kwargs):
super(ConvBnRelu, self).__init__()
if using_sep_conv:
self.conv = DepthwiseConvBnRelu(num_channels,
num_filters,
filter_size,
**kwargs)
else:
self.conv = Conv2D(num_channels,
num_filters,
filter_size,
**kwargs)
self.batch_norm = BatchNorm(num_filters)
def forward(self, x):
x = self.conv(x)
x = self.batch_norm(x)
x = F.relu(x)
return x
class ConvBn(dygraph.Layer):
def __init__(self, num_channels, num_filters, filter_size, **kwargs):
super(ConvBn, self).__init__()
self.conv = Conv2D(num_channels,
num_filters,
filter_size,
**kwargs)
self.batch_norm = BatchNorm(num_filters)
def forward(self, x):
x = self.conv(x)
x = self.batch_norm(x)
return x
class ConvReluPool(dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(ConvReluPool, self).__init__()
self.conv = Conv2D(num_channels,
num_filters,
filter_size=3,
stride=1,
padding=1,
dilation=1)
def forward(self, x):
x = self.conv(x)
x = F.relu(x)
x = fluid.layers.pool2d(x, pool_size=2, pool_type="max", pool_stride=2)
return x
class ConvBnReluUpsample(dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(ConvBnReluUpsample, self).__init__()
self.conv_bn_relu = ConvBnRelu(num_channels, num_filters)
def forward(self, x, upsample_scale=2):
x = self.conv_bn_relu(x)
new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
x = fluid.layers.resize_bilinear(x, new_shape)
return x
class DepthwiseConvBnRelu(dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
**kwargs):
super(DepthwiseConvBnRelu, self).__init__()
self.depthwise_conv = ConvBn(num_channels,
num_filters=num_channels,
filter_size=filter_size,
groups=num_channels,
use_cudnn=False,
**kwargs)
self.piontwise_conv = ConvBnRelu(num_channels,
num_filters,
filter_size=1,
groups=1)
def forward(self, x):
x = self.depthwise_conv(x)
x = self.piontwise_conv(x)
return x
class Activation(fluid.dygraph.Layer):
class Activation(nn.Layer):
"""
The wrapper of activations
For example:
......@@ -152,7 +49,8 @@ class Activation(fluid.dygraph.Layer):
act_name = act_dict[act]
self.act_func = eval("activation.{}()".format(act_name))
else:
raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys()))
raise KeyError("{} does not exist in the current {}".format(
act, act_dict.keys()))
def forward(self, x):
......
......@@ -70,18 +70,6 @@ class ConvReluPool(nn.Layer):
return x
# class ConvBnReluUpsample(nn.Layer):
# def __init__(self, in_channels, out_channels):
# super(ConvBnReluUpsample, self).__init__()
# self.conv_bn_relu = ConvBnRelu(in_channels, out_channels)
# def forward(self, x, upsample_scale=2):
# x = self.conv_bn_relu(x)
# new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
# x = F.resize_bilinear(x, new_shape)
# return x
class DepthwiseConvBnRelu(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
super(DepthwiseConvBnRelu, self).__init__()
......@@ -100,44 +88,43 @@ class DepthwiseConvBnRelu(nn.Layer):
return x
class Activation(nn.Layer):
class AuxLayer(nn.Layer):
"""
The wrapper of activations
For example:
>>> relu = Activation("relu")
>>> print(relu)
<class 'paddle.nn.layer.activation.ReLU'>
>>> sigmoid = Activation("sigmoid")
>>> print(sigmoid)
<class 'paddle.nn.layer.activation.Sigmoid'>
>>> not_exit_one = Activation("not_exit_one")
KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink',
'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax',
'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
The auxilary layer implementation for auxilary loss
Args:
act (str): the activation name in lowercase
in_channels (int): the number of input channels.
inter_channels (int): intermediate channels.
out_channels (int): the number of output channels, which is usually num_classes.
dropout_prob (float): the droput rate. Default to 0.1.
"""
def __init__(self, act=None):
super(Activation, self).__init__()
def __init__(self,
in_channels,
inter_channels,
out_channels,
dropout_prob=0.1):
super(AuxLayer, self).__init__()
self._act = act
upper_act_names = activation.__all__
lower_act_names = [act.lower() for act in upper_act_names]
act_dict = dict(zip(lower_act_names, upper_act_names))
self.conv_bn_relu = ConvBnRelu(
in_channels=in_channels,
out_channels=inter_channels,
kernel_size=3,
padding=1)
if act is not None:
if act in act_dict.keys():
act_name = act_dict[act]
self.act_func = eval("activation.{}()".format(act_name))
else:
raise KeyError("{} does not exist in the current {}".format(
act, act_dict.keys()))
self.conv = nn.Conv2d(
in_channels=inter_channels,
out_channels=out_channels,
kernel_size=1)
def forward(self, x):
self.dropout_prob = dropout_prob
if self._act is not None:
return self.act_func(x)
else:
def forward(self, x):
x = self.conv_bn_relu(x)
x = F.dropout(x, p=self.dropout_prob)
x = self.conv(x)
return x
......@@ -13,85 +13,96 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle.nn import SyncBatchNorm as BatchNorm
from paddleseg.models.common import layer_utils
from paddleseg.models.common import layer_libs
class FCNHead(nn.Layer):
class ASPPModule(nn.Layer):
"""
The FCNHead implementation used in auxilary layer
Atrous Spatial Pyramid Pooling
Args:
in_channels (int): the number of input channels
out_channels (int): the number of output channels
"""
def __init__(self, in_channels, out_channels):
super(FCNHead, self).__init__()
inter_channels = in_channels // 4
self.conv_bn_relu = layer_utils.ConvBnRelu(
in_channels=in_channels,
out_channels=inter_channels,
kernel_size=3,
padding=1)
aspp_ratios (tuple): the dilation rate using in ASSP module.
self.conv = nn.Conv2d(
in_channels=inter_channels,
out_channels=out_channels,
kernel_size=1)
in_channels (int): the number of input channels.
def forward(self, x):
x = self.conv_bn_relu(x)
x = F.dropout(x, p=0.1)
x = self.conv(x)
return x
out_channels (int): the number of output channels.
sep_conv (bool): if using separable conv in ASPP module.
class AuxLayer(nn.Layer):
"""
The auxilary layer implementation for auxilary loss
image_pooling: if augmented with image-level features.
Args:
in_channels (int): the number of input channels.
inter_channels (int): intermediate channels.
out_channels (int): the number of output channels, which is usually num_classes.
"""
def __init__(self,
aspp_ratios,
in_channels,
inter_channels,
out_channels,
dropout_prob=0.1):
super(AuxLayer, self).__init__()
sep_conv=False,
image_pooling=False):
super(ASPPModule, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(
in_channels=in_channels,
out_channels=inter_channels,
kernel_size=3,
padding=1)
self.aspp_blocks = []
for ratio in aspp_ratios:
if sep_conv and ratio > 1:
conv_func = layer_libs.DepthwiseConvBnRelu
else:
conv_func = layer_libs.ConvBnRelu
self.conv = nn.Conv2d(
in_channels=inter_channels,
block = conv_func(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1 if ratio == 1 else 3,
dilation=ratio,
padding=0 if ratio == 1 else ratio
)
self.aspp_blocks.append(block)
out_size = len(self.aspp_blocks)
if image_pooling:
self.global_avg_pool = nn.Sequential(
nn.AdaptiveAvgPool2d(output_size=(1, 1)),
layer_libs.ConvBnRelu(in_channels, out_channels, kernel_size=1, bias_attr=False)
)
out_size += 1
self.image_pooling = image_pooling
self.conv_bn_relu = layer_libs.ConvBnRelu(
in_channels=out_channels * out_size,
out_channels=out_channels,
kernel_size=1)
self.dropout_prob = dropout_prob
self.dropout = nn.Dropout(p=0.1) # drop rate
def forward(self, x):
outputs = []
for block in self.aspp_blocks:
outputs.append(block(x))
if self.image_pooling:
img_avg = self.global_avg_pool(x)
img_avg = F.resize_bilinear(img_avg, out_shape=x.shape[2:])
outputs.append(img_avg)
x = paddle.concat(outputs, axis=1)
x = self.conv_bn_relu(x)
x = F.dropout(x, p=self.dropout_prob)
x = self.conv(x)
x = self.dropout(x)
return x
class PPModule(nn.Layer):
"""
Pyramid pooling module
Pyramid pooling module orginally in PSPNet
Args:
in_channels (int): the number of intput channels to pyramid pooling module.
......@@ -109,6 +120,7 @@ class PPModule(nn.Layer):
bin_sizes=(1, 2, 3, 6),
dim_reduction=True):
super(PPModule, self).__init__()
self.bin_sizes = bin_sizes
inter_channels = in_channels
......@@ -121,7 +133,7 @@ class PPModule(nn.Layer):
for size in bin_sizes
])
self.conv_bn_relu2 = layer_utils.ConvBnRelu(
self.conv_bn_relu2 = layer_libs.ConvBnRelu(
in_channels=in_channels + inter_channels * len(bin_sizes),
out_channels=out_channels,
kernel_size=3,
......@@ -147,20 +159,17 @@ class PPModule(nn.Layer):
conv (tensor): a tensor after Pyramid Pooling Module
"""
# this paddle version does not support AdaptiveAvgPool2d, so skip it here.
# prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
conv = layer_utils.ConvBnRelu(
prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
conv = layer_libs.ConvBnRelu(
in_channels=in_channels, out_channels=out_channels, kernel_size=1)
return conv
return nn.Sequential(prior, conv)
def forward(self, input):
cat_layers = []
for i, stage in enumerate(self.stages):
size = self.bin_sizes[i]
x = F.adaptive_pool2d(
input, pool_size=(size, size), pool_type="max")
x = stage(x)
x = stage(input)
x = F.resize_bilinear(x, out_shape=input.shape[2:])
cat_layers.append(x)
cat_layers = [input] + cat_layers[::-1]
......
......@@ -18,7 +18,7 @@ import paddle
import paddle.nn.functional as F
from paddle import nn
from paddleseg.cvlibs import manager
from paddleseg.models.common import layer_utils
from paddleseg.models.common import pyramid_pool, layer_libs
from paddleseg.utils import utils
__all__ = ['DeepLabV3P', 'DeepLabV3']
......@@ -43,8 +43,9 @@ class DeepLabV3P(nn.Layer):
model_pretrained (str): the path of pretrained model.
output_stride (int): the ratio of input size and final feature size.
Support 16 or 8. Default to 16.
aspp_ratios (tuple): the dilation rate using in ASSP module.
if output_stride=16, aspp_ratios should be set as (1, 6, 12, 18).
if output_stride=8, aspp_ratios is (1, 12, 24, 36).
backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone.
the first index will be taken as a low-level feature in Deconder component;
......@@ -61,18 +62,24 @@ class DeepLabV3P(nn.Layer):
def __init__(self,
num_classes,
backbone,
backbone_pretrained=None,
model_pretrained=None,
backbone_indices=(0, 3),
backbone_channels=(256, 2048),
output_stride=16):
aspp_ratios=(1, 6, 12, 18),
aspp_out_channels=256):
super(DeepLabV3P, self).__init__()
self.backbone = backbone
self.aspp = ASPP(output_stride, backbone_channels[1])
self.backbone_pretrained = backbone_pretrained
self.model_pretrained = model_pretrained
self.aspp = pyramid_pool.ASPPModule(
aspp_ratios, backbone_channels[1], aspp_out_channels, sep_conv=True, image_pooling=True)
self.decoder = Decoder(num_classes, backbone_channels[0])
self.backbone_indices = backbone_indices
self.init_weight(model_pretrained)
self.init_weight()
def forward(self, input, label=None):
......@@ -87,18 +94,16 @@ class DeepLabV3P(nn.Layer):
return logit_list
def init_weight(self, pretrained_model=None):
def init_weight(self):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
if self.model_pretrained is not None:
utils.load_pretrained_model(self, self.model_pretrained)
elif self.backbone_pretrained is not None:
utils.load_pretrained_model(self.backbone, self.backbone_pretrained)
@manager.MODELS.add_component
......@@ -119,15 +124,21 @@ class DeepLabV3(nn.Layer):
def __init__(self,
num_classes,
backbone,
backbone_pretrained=None,
model_pretrained=None,
backbone_indices=(3,),
backbone_channels=(2048,),
output_stride=16):
aspp_ratios=(1, 6, 12, 18),
aspp_out_channels=256):
super(DeepLabV3, self).__init__()
self.backbone = backbone
self.aspp = ASPP(output_stride, backbone_channels[0])
self.aspp = pyramid_pool.ASPPModule(
aspp_ratios, backbone_channels[0], aspp_out_channels,
sep_conv=False, image_pooling=True)
self.cls = nn.Conv2d(
in_channels=backbone_channels[0],
out_channels=num_classes,
......@@ -161,98 +172,6 @@ class DeepLabV3(nn.Layer):
pretrained_model))
class ImageAverage(nn.Layer):
"""
Global average pooling
Args:
in_channels (int): the number of input channels.
"""
def __init__(self, in_channels):
super(ImageAverage, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(
in_channels, out_channels=256, kernel_size=1)
def forward(self, input):
x = paddle.reduce_mean(input, dim=[2, 3], keep_dim=True)
x = self.conv_bn_relu(x)
x = F.resize_bilinear(x, out_shape=input.shape[2:])
return x
class ASPP(nn.Layer):
"""
Decoder module of DeepLabV3P model
Args:
output_stride (int): the ratio of input size and final feature size. Support 16 or 8.
in_channels (int): the number of input channels in decoder module.
"""
def __init__(self, output_stride, in_channels):
super(ASPP, self).__init__()
if output_stride == 16:
aspp_ratios = (6, 12, 18)
elif output_stride == 8:
aspp_ratios = (12, 24, 36)
else:
raise NotImplementedError(
"Only support output_stride is 8 or 16, but received{}".format(
output_stride))
self.image_average = ImageAverage(in_channels=in_channels)
# The first aspp using 1*1 conv
self.aspp1 = layer_utils.DepthwiseConvBnRelu(
in_channels=in_channels, out_channels=256, kernel_size=1)
# The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0]
self.aspp2 = layer_utils.DepthwiseConvBnRelu(
in_channels=in_channels,
out_channels=256,
kernel_size=3,
dilation=aspp_ratios[0],
padding=aspp_ratios[0])
# The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1]
self.aspp3 = layer_utils.DepthwiseConvBnRelu(
in_channels=in_channels,
out_channels=256,
kernel_size=3,
dilation=aspp_ratios[1],
padding=aspp_ratios[1])
# The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2]
self.aspp4 = layer_utils.DepthwiseConvBnRelu(
in_channels=in_channels,
out_channels=256,
kernel_size=3,
dilation=aspp_ratios[2],
padding=aspp_ratios[2])
# After concat op, using 1*1 conv
self.conv_bn_relu = layer_utils.ConvBnRelu(
in_channels=1280, out_channels=256, kernel_size=1)
def forward(self, x):
x1 = self.image_average(x)
x2 = self.aspp1(x)
x3 = self.aspp2(x)
x4 = self.aspp3(x)
x5 = self.aspp4(x)
x = paddle.concat([x1, x2, x3, x4, x5], axis=1)
x = self.conv_bn_relu(x)
x = F.dropout(x, p=0.1) # dropout_prob
return x
class Decoder(nn.Layer):
"""
Decoder module of DeepLabV3P model
......@@ -267,12 +186,12 @@ class Decoder(nn.Layer):
def __init__(self, num_classes, in_channels):
super(Decoder, self).__init__()
self.conv_bn_relu1 = layer_utils.ConvBnRelu(
self.conv_bn_relu1 = layer_libs.ConvBnRelu(
in_channels=in_channels, out_channels=48, kernel_size=1)
self.conv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
self.conv_bn_relu2 = layer_libs.DepthwiseConvBnRelu(
in_channels=304, out_channels=256, kernel_size=3, padding=1)
self.conv_bn_relu3 = layer_utils.DepthwiseConvBnRelu(
self.conv_bn_relu3 = layer_libs.DepthwiseConvBnRelu(
in_channels=256, out_channels=256, kernel_size=3, padding=1)
self.conv = nn.Conv2d(
in_channels=256, out_channels=num_classes, kernel_size=1)
......
......@@ -15,7 +15,7 @@
import paddle.nn.functional as F
from paddle import nn
from paddleseg.cvlibs import manager
from paddleseg.models.common import layer_utils, model_utils
from paddleseg.models.common import layer_libs
@manager.MODELS.add_component
......@@ -110,15 +110,15 @@ class LearningToDownsample(nn.Layer):
def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64):
super(LearningToDownsample, self).__init__()
self.conv_bn_relu = layer_utils.ConvBnRelu(
self.conv_bn_relu = layer_libs.ConvBnRelu(
in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2)
self.dsconv_bn_relu1 = layer_utils.DepthwiseConvBnRelu(
self.dsconv_bn_relu1 = layer_libs.DepthwiseConvBnRelu(
in_channels=dw_channels1,
out_channels=dw_channels2,
kernel_size=3,
stride=2,
padding=1)
self.dsconv_bn_relu2 = layer_utils.DepthwiseConvBnRelu(
self.dsconv_bn_relu2 = layer_libs.DepthwiseConvBnRelu(
in_channels=dw_channels2,
out_channels=out_channels,
kernel_size=3,
......@@ -220,13 +220,13 @@ class LinearBottleneck(nn.Layer):
expand_channels = in_channels * expansion
self.block = nn.Sequential(
# pw
layer_utils.ConvBnRelu(
layer_libs.ConvBnRelu(
in_channels=in_channels,
out_channels=expand_channels,
kernel_size=1,
bias_attr=False),
# dw
layer_utils.ConvBnRelu(
layer_libs.ConvBnRelu(
in_channels=expand_channels,
out_channels=expand_channels,
kernel_size=3,
......@@ -267,7 +267,7 @@ class FeatureFusionModule(nn.Layer):
super(FeatureFusionModule, self).__init__()
# There only depth-wise conv is used WITHOUT point-wise conv
self.dwconv = layer_utils.ConvBnRelu(
self.dwconv = layer_libs.ConvBnRelu(
in_channels=low_in_channels,
out_channels=out_channels,
kernel_size=3,
......@@ -317,13 +317,13 @@ class Classifier(nn.Layer):
def __init__(self, input_channels, num_classes):
super(Classifier, self).__init__()
self.dsconv1 = layer_utils.DepthwiseConvBnRelu(
self.dsconv1 = layer_libs.DepthwiseConvBnRelu(
in_channels=input_channels,
out_channels=input_channels,
kernel_size=3,
padding=1)
self.dsconv2 = layer_utils.DepthwiseConvBnRelu(
self.dsconv2 = layer_libs.DepthwiseConvBnRelu(
in_channels=input_channels,
out_channels=input_channels,
kernel_size=3,
......
......@@ -25,7 +25,7 @@ from paddleseg.cvlibs import manager
from paddleseg import utils
from paddleseg.cvlibs import param_init
from paddleseg.utils import logger
from paddleseg.models.common import layer_utils
from paddleseg.models.common import layer_libs, activation
__all__ = [
"fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
......@@ -143,7 +143,7 @@ class ConvBNLayer(nn.Layer):
groups=groups,
bias_attr=False)
self._batch_norm = BatchNorm(out_channels)
self.act = layer_utils.Activation(act=act)
self.act = activation.Activation(act=act)
def forward(self, input):
y = self._conv(input)
......
......@@ -18,7 +18,7 @@ import paddle
import paddle.nn.functional as F
from paddle import nn
from paddleseg.cvlibs import manager
from paddleseg.models.common import layer_utils, model_utils
from paddleseg.models.common import layer_libs
from paddleseg.utils import utils
......@@ -72,7 +72,7 @@ class GCNet(nn.Layer):
self.backbone = backbone
in_channels = backbone_channels[1]
self.conv_bn_relu1 = layer_utils.ConvBnRelu(
self.conv_bn_relu1 = layer_libs.ConvBnRelu(
in_channels=in_channels,
out_channels=gc_channels,
kernel_size=3,
......@@ -80,13 +80,13 @@ class GCNet(nn.Layer):
self.gc_block = GlobalContextBlock(in_channels=gc_channels, ratio=ratio)
self.conv_bn_relu2 = layer_utils.ConvBnRelu(
self.conv_bn_relu2 = layer_libs.ConvBnRelu(
in_channels=gc_channels,
out_channels=gc_channels,
kernel_size=3,
padding=1)
self.conv_bn_relu3 = layer_utils.ConvBnRelu(
self.conv_bn_relu3 = layer_libs.ConvBnRelu(
in_channels=in_channels + gc_channels,
out_channels=gc_channels,
kernel_size=3,
......@@ -96,7 +96,7 @@ class GCNet(nn.Layer):
in_channels=gc_channels, out_channels=num_classes, kernel_size=1)
if enable_auxiliary_loss:
self.auxlayer = model_utils.AuxLayer(
self.auxlayer = layer_libs.AuxLayer(
in_channels=backbone_channels[0],
inter_channels=backbone_channels[0] // 4,
out_channels=num_classes)
......@@ -161,8 +161,8 @@ class GlobalContextBlock(nn.Layer):
self.conv_mask = nn.Conv2d(
in_channels=in_channels, out_channels=1, kernel_size=1)
# current paddle version does not support Softmax class
# self.softmax = layer_utils.Activation("softmax", dim=2)
self.softmax = nn.Softmax(axis=2)
inter_channels = int(in_channels * ratio)
self.channel_add_conv = nn.Sequential(
......@@ -188,7 +188,7 @@ class GlobalContextBlock(nn.Layer):
# [N, 1, H * W]
context_mask = paddle.reshape(
context_mask, shape=[batch, 1, height * width])
context_mask = F.softmax(context_mask)
context_mask = self.softmax(context_mask)
# [N, 1, H * W, 1]
context_mask = paddle.unsqueeze(context_mask, axis=-1)
# [N, 1, C, 1]
......
......@@ -18,7 +18,7 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph import Sequential, Conv2D
from paddleseg.cvlibs import manager
from paddleseg.models.common.layer_utils import ConvBnRelu
from paddleseg.models.common.layer_libs import ConvBnRelu
from paddleseg import utils
......
......@@ -17,7 +17,7 @@ import os
import paddle.nn.functional as F
from paddle import nn
from paddleseg.cvlibs import manager
from paddleseg.models.common import model_utils
from paddleseg.models.common import layer_libs, pyramid_pool
from paddleseg.utils import utils
......@@ -70,7 +70,7 @@ class PSPNet(nn.Layer):
self.backbone = backbone
self.backbone_indices = backbone_indices
self.psp_module = model_utils.PPModule(
self.psp_module = pyramid_pool.PPModule(
in_channels=backbone_channels[1],
out_channels=pp_out_channels,
bin_sizes=bin_sizes)
......@@ -81,8 +81,11 @@ class PSPNet(nn.Layer):
kernel_size=1)
if enable_auxiliary_loss:
self.fcn_head = model_utils.FCNHead(
in_channels=backbone_channels[0], out_channels=num_classes)
self.auxlayer = layer_libs.AuxLayer(
in_channels=backbone_channels[0],
inter_channels=backbone_channels[0] // 4,
out_channels=num_classes)
self.enable_auxiliary_loss = enable_auxiliary_loss
......@@ -102,7 +105,7 @@ class PSPNet(nn.Layer):
if self.enable_auxiliary_loss:
auxiliary_feat = feat_list[self.backbone_indices[0]]
auxiliary_logit = self.fcn_head(auxiliary_feat)
auxiliary_logit = self.auxlayer(auxiliary_feat)
auxiliary_logit = F.resize_bilinear(auxiliary_logit,
input.shape[2:])
logit_list.append(auxiliary_logit)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .transforms import *
from . import functional
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
from PIL import Image, ImageEnhance
def normalize(im, mean, std):
im = im.astype(np.float32, copy=False) / 255.0
im -= mean
im /= std
return im
def permute(im):
im = np.transpose(im, (2, 0, 1))
return im
def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
if isinstance(target_size, list) or isinstance(target_size, tuple):
w = target_size[0]
h = target_size[1]
else:
w = target_size
h = target_size
im = cv2.resize(im, (w, h), interpolation=interp)
return im
def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
value = max(im.shape[0], im.shape[1])
scale = float(long_size) / float(value)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height), interpolation=interpolation)
return im
def horizontal_flip(im):
if len(im.shape) == 3:
im = im[:, ::-1, :]
elif len(im.shape) == 2:
im = im[:, ::-1]
return im
def vertical_flip(im):
if len(im.shape) == 3:
im = im[::-1, :, :]
elif len(im.shape) == 2:
im = im[::-1, :]
return im
def brightness(im, brightness_lower, brightness_upper):
brightness_delta = np.random.uniform(brightness_lower, brightness_upper)
im = ImageEnhance.Brightness(im).enhance(brightness_delta)
return im
def contrast(im, contrast_lower, contrast_upper):
contrast_delta = np.random.uniform(contrast_lower, contrast_upper)
im = ImageEnhance.Contrast(im).enhance(contrast_delta)
return im
def saturation(im, saturation_lower, saturation_upper):
saturation_delta = np.random.uniform(saturation_lower, saturation_upper)
im = ImageEnhance.Color(im).enhance(saturation_delta)
return im
def hue(im, hue_lower, hue_upper):
hue_delta = np.random.uniform(hue_lower, hue_upper)
im = np.array(im.convert('HSV'))
im[:, :, 0] = im[:, :, 0] + hue_delta
im = Image.fromarray(im, mode='HSV').convert('RGB')
return im
def rotate(im, rotate_lower, rotate_upper):
rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
im = im.rotate(int(rotate_delta))
return im
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from collections import OrderedDict
import numpy as np
from PIL import Image
import cv2
from .functional import *
from dygraph.cvlibs import manager
@manager.TRANSFORMS.add_component
class Compose:
def __init__(self, transforms, to_rgb=True):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
if len(transforms) < 1:
raise ValueError('The length of transforms ' + \
'must be equal or larger than 1!')
self.transforms = transforms
self.to_rgb = to_rgb
def __call__(self, im, im_info=None, label=None):
if im_info is None:
im_info = list()
if isinstance(im, str):
im = cv2.imread(im).astype('float32')
if isinstance(label, str):
label = np.asarray(Image.open(label))
if im is None:
raise ValueError('Can\'t read The image file {}!'.format(im))
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
for op in self.transforms:
outputs = op(im, im_info, label)
im = outputs[0]
if len(outputs) >= 2:
im_info = outputs[1]
if len(outputs) == 3:
label = outputs[2]
im = permute(im)
# if len(outputs) == 3:
# label = label[np.newaxis, :, :]
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomHorizontalFlip:
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
if random.random() < self.prob:
im = horizontal_flip(im)
if label is not None:
label = horizontal_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomVerticalFlip:
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
if random.random() < self.prob:
im = vertical_flip(im)
if label is not None:
label = vertical_flip(label)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class Resize:
# The interpolation mode
interp_dict = {
'NEAREST': cv2.INTER_NEAREST,
'LINEAR': cv2.INTER_LINEAR,
'CUBIC': cv2.INTER_CUBIC,
'AREA': cv2.INTER_AREA,
'LANCZOS4': cv2.INTER_LANCZOS4
}
def __init__(self, target_size=512, interp='LINEAR'):
self.interp = interp
if not (interp == "RANDOM" or interp in self.interp_dict):
raise ValueError("interp should be one of {}".format(
self.interp_dict.keys()))
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise TypeError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, im, im_info=None, label=None):
if im_info is None:
im_info = list()
im_info.append(('resize', im.shape[:2]))
if not isinstance(im, np.ndarray):
raise TypeError("Resize: image type is not numpy.")
if len(im.shape) != 3:
raise ValueError('Resize: image is not 3-dimensional.')
if self.interp == "RANDOM":
interp = random.choice(list(self.interp_dict.keys()))
else:
interp = self.interp
im = resize(im, self.target_size, self.interp_dict[interp])
if label is not None:
label = resize(label, self.target_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class ResizeByLong:
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, im, im_info=None, label=None):
if im_info is None:
im_info = list()
im_info.append(('resize', im.shape[:2]))
im = resize_long(im, self.long_size)
if label is not None:
label = resize_long(label, self.long_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class ResizeRangeScaling:
def __init__(self, min_value=400, max_value=600):
if min_value > max_value:
raise ValueError('min_value must be less than max_value, '
'but they are {} and {}.'.format(
min_value, max_value))
self.min_value = min_value
self.max_value = max_value
def __call__(self, im, im_info=None, label=None):
if self.min_value == self.max_value:
random_size = self.max_value
else:
random_size = int(
np.random.uniform(self.min_value, self.max_value) + 0.5)
im = resize_long(im, random_size, cv2.INTER_LINEAR)
if label is not None:
label = resize_long(label, random_size, cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class ResizeStepScaling:
def __init__(self,
min_scale_factor=0.75,
max_scale_factor=1.25,
scale_step_size=0.25):
if min_scale_factor > max_scale_factor:
raise ValueError(
'min_scale_factor must be less than max_scale_factor, '
'but they are {} and {}.'.format(min_scale_factor,
max_scale_factor))
self.min_scale_factor = min_scale_factor
self.max_scale_factor = max_scale_factor
self.scale_step_size = scale_step_size
def __call__(self, im, im_info=None, label=None):
if self.min_scale_factor == self.max_scale_factor:
scale_factor = self.min_scale_factor
elif self.scale_step_size == 0:
scale_factor = np.random.uniform(self.min_scale_factor,
self.max_scale_factor)
else:
num_steps = int((self.max_scale_factor - self.min_scale_factor) /
self.scale_step_size + 1)
scale_factors = np.linspace(self.min_scale_factor,
self.max_scale_factor,
num_steps).tolist()
np.random.shuffle(scale_factors)
scale_factor = scale_factors[0]
w = int(round(scale_factor * im.shape[1]))
h = int(round(scale_factor * im.shape[0]))
im = resize(im, (w, h), cv2.INTER_LINEAR)
if label is not None:
label = resize(label, (w, h), cv2.INTER_NEAREST)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class Normalize:
def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
self.mean = mean
self.std = std
if not (isinstance(self.mean, list) and isinstance(self.std, list)):
raise ValueError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, im, im_info=None, label=None):
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im = normalize(im, mean, std)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class Padding:
def __init__(self,
target_size,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'when target is list or tuple, it should include 2 elements, but it is {}'
.format(target_size))
elif not isinstance(target_size, int):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(target_size)))
self.target_size = target_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
if im_info is None:
im_info = list()
im_info.append(('padding', im.shape[:2]))
im_height, im_width = im.shape[0], im.shape[1]
if isinstance(self.target_size, int):
target_height = self.target_size
target_width = self.target_size
else:
target_height = self.target_size[1]
target_width = self.target_size[0]
pad_height = target_height - im_height
pad_width = target_width - im_width
if pad_height < 0 or pad_width < 0:
raise ValueError(
'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})'
.format(im_width, im_height, target_width, target_height))
else:
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomPaddingCrop:
def __init__(self,
crop_size=512,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
if isinstance(crop_size, list) or isinstance(crop_size, tuple):
if len(crop_size) != 2:
raise ValueError(
'when crop_size is list or tuple, it should include 2 elements, but it is {}'
.format(crop_size))
elif not isinstance(crop_size, int):
raise TypeError(
"Type of crop_size is invalid. Must be Integer or List or tuple, now is {}"
.format(type(crop_size)))
self.crop_size = crop_size
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
if isinstance(self.crop_size, int):
crop_width = self.crop_size
crop_height = self.crop_size
else:
crop_width = self.crop_size[0]
crop_height = self.crop_size[1]
img_height = im.shape[0]
img_width = im.shape[1]
if img_height == crop_height and img_width == crop_width:
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
else:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
im = cv2.copyMakeBorder(
im,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.im_padding_value)
if label is not None:
label = cv2.copyMakeBorder(
label,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=self.label_padding_value)
img_height = im.shape[0]
img_width = im.shape[1]
if crop_height > 0 and crop_width > 0:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
im = im[h_off:(crop_height + h_off), w_off:(
w_off + crop_width), :]
if label is not None:
label = label[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomBlur:
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, im, im_info=None, label=None):
if self.prob <= 0:
n = 0
elif self.prob >= 1:
n = 1
else:
n = int(1.0 / self.prob)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
im = cv2.GaussianBlur(im, (radius, radius), 0, 0)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomRotation:
def __init__(self,
max_rotation=15,
im_padding_value=[127.5, 127.5, 127.5],
label_padding_value=255):
self.max_rotation = max_rotation
self.im_padding_value = im_padding_value
self.label_padding_value = label_padding_value
def __call__(self, im, im_info=None, label=None):
if self.max_rotation > 0:
(h, w) = im.shape[:2]
do_rotation = np.random.uniform(-self.max_rotation,
self.max_rotation)
pc = (w // 2, h // 2)
r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
cos = np.abs(r[0, 0])
sin = np.abs(r[0, 1])
nw = int((h * sin) + (w * cos))
nh = int((h * cos) + (w * sin))
(cx, cy) = pc
r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy
dsize = (nw, nh)
im = cv2.warpAffine(
im,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.im_padding_value)
label = cv2.warpAffine(
label,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.label_padding_value)
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomScaleAspect:
def __init__(self, min_scale=0.5, aspect_ratio=0.33):
self.min_scale = min_scale
self.aspect_ratio = aspect_ratio
def __call__(self, im, im_info=None, label=None):
if self.min_scale != 0 and self.aspect_ratio != 0:
img_height = im.shape[0]
img_width = im.shape[1]
for i in range(0, 10):
area = img_height * img_width
target_area = area * np.random.uniform(self.min_scale, 1.0)
aspectRatio = np.random.uniform(self.aspect_ratio,
1.0 / self.aspect_ratio)
dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
if (np.random.randint(10) < 5):
tmp = dw
dw = dh
dh = tmp
if (dh < img_height and dw < img_width):
h1 = np.random.randint(0, img_height - dh)
w1 = np.random.randint(0, img_width - dw)
im = im[h1:(h1 + dh), w1:(w1 + dw), :]
label = label[h1:(h1 + dh), w1:(w1 + dw)]
im = cv2.resize(
im, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
label = cv2.resize(
label, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
@manager.TRANSFORMS.add_component
class RandomDistort:
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, im, im_info=None, label=None):
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [brightness, contrast, saturation, hue]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
im = im.astype('uint8')
im = Image.fromarray(im)
for id in range(4):
params = params_dict[ops[id].__name__]
prob = prob_dict[ops[id].__name__]
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
im = np.asarray(im).astype('float32')
if label is None:
return (im, im_info)
else:
return (im, im_info, label)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import logger
from . import download
from .metrics import ConfusionMatrix
from .utils import *
from .timer import Timer, calculate_eta
from .get_environ_info import get_environ_info
from .config import Config
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import codecs
import os
from typing import Any, Callable
import yaml
import paddle.fluid as fluid
import dygraph.cvlibs.manager as manager
class Config(object):
'''
Training config.
Args:
path(str) : the path of config file, supports yaml format only
'''
def __init__(self, path: str):
if not os.path.exists(path):
raise FileNotFoundError('File {} does not exist'.format(path))
if path.endswith('yml') or path.endswith('yaml'):
dic = self._parse_from_yaml(path)
print(dic)
self._build(dic)
else:
raise RuntimeError('Config file should in yaml format!')
def _update_dic(self, dic, base_dic):
"""
update config from dic based base_dic
"""
base_dic = base_dic.copy()
for key, val in dic.items():
if isinstance(val, dict) and key in base_dic:
base_dic[key] = self._update_dic(val, base_dic[key])
else:
base_dic[key] = val
dic = base_dic
return dic
def _parse_from_yaml(self, path: str):
'''Parse a yaml file and build config'''
with codecs.open(path, 'r', 'utf-8') as file:
dic = yaml.load(file, Loader=yaml.FullLoader)
if '_base_' in dic:
cfg_dir = os.path.dirname(path)
base_path = dic.pop('_base_')
base_path = os.path.join(cfg_dir, base_path)
base_dic = self._parse_from_yaml(base_path)
dic = self._update_dic(dic, base_dic)
return dic
def _build(self, dic: dict):
'''Build config from dictionary'''
dic = dic.copy()
self._batch_size = dic.get('batch_size', 1)
self._iters = dic.get('iters')
if 'model' not in dic:
raise RuntimeError()
self._model_cfg = dic['model']
self._model = None
self._train_dataset = dic.get('train_dataset')
self._val_dataset = dic.get('val_dataset')
self._learning_rate_cfg = dic.get('learning_rate', {})
self._learning_rate = self._learning_rate_cfg.get('value')
self._decay = self._learning_rate_cfg.get('decay', {
'type': 'poly',
'power': 0.9
})
self._loss_cfg = dic.get('loss', {})
self._losses = None
self._optimizer_cfg = dic.get('optimizer', {})
def update(self,
learning_rate: float = None,
batch_size: int = None,
iters: int = None):
'''Update config'''
if learning_rate:
self._learning_rate = learning_rate
if batch_size:
self._batch_size = batch_size
if iters:
self._iters = iters
@property
def batch_size(self) -> int:
return self._batch_size
@property
def iters(self) -> int:
if not self._iters:
raise RuntimeError('No iters specified in the configuration file.')
return self._iters
@property
def learning_rate(self) -> float:
if not self._learning_rate:
raise RuntimeError(
'No learning rate specified in the configuration file.')
if self.decay_type == 'poly':
lr = self._learning_rate
args = self.decay_args
args.setdefault('decay_steps', self.iters)
return fluid.layers.polynomial_decay(lr, **args)
else:
raise RuntimeError('Only poly decay support.')
@property
def optimizer(self) -> fluid.optimizer.Optimizer:
if self.optimizer_type == 'sgd':
lr = self.learning_rate
args = self.optimizer_args
args.setdefault('momentum', 0.9)
weight_decay = args.get('weight_decay', None)
if weight_decay is not None:
args.pop('weight_decay')
regularization = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=weight_decay)
args.setdefault('regularization', None)
return fluid.optimizer.Momentum(
lr, parameter_list=self.model.parameters(), **args)
else:
raise RuntimeError('Only sgd optimizer support.')
@property
def optimizer_type(self) -> str:
otype = self._optimizer_cfg.get('type')
if not otype:
raise RuntimeError(
'No optimizer type specified in the configuration file.')
return otype
@property
def optimizer_args(self) -> dict:
args = self._optimizer_cfg.copy()
args.pop('type')
return args
@property
def decay_type(self) -> str:
return self._decay['type']
@property
def decay_args(self) -> dict:
args = self._decay.copy()
args.pop('type')
return args
@property
def loss(self) -> list:
if not self._losses:
args = self._loss_cfg.copy()
self._losses = dict()
for key, val in args.items():
if key == 'types':
self._losses['types'] = []
for item in args['types']:
self._losses['types'].append(self._load_object(item))
else:
self._losses[key] = val
if len(self._losses['coef']) != len(self._losses['types']):
raise RuntimeError(
'The length of coef should equal to types in loss config: {} != {}.'
.format(
len(self._losses['coef']), len(self._losses['types'])))
return self._losses
@property
def model(self) -> Callable:
if not self._model:
self._model = self._load_object(self._model_cfg)
return self._model
@property
def train_dataset(self) -> Any:
if not self._train_dataset:
return None
return self._load_object(self._train_dataset)
@property
def val_dataset(self) -> Any:
if not self._val_dataset:
return None
return self._load_object(self._val_dataset)
def _load_component(self, com_name: str) -> Any:
com_list = [
manager.MODELS, manager.BACKBONES, manager.DATASETS,
manager.TRANSFORMS, manager.LOSSES
]
for com in com_list:
if com_name in com.components_dict:
return com[com_name]
else:
raise RuntimeError(
'The specified component was not found {}.'.format(com_name))
def _load_object(self, cfg: dict) -> Any:
cfg = cfg.copy()
if 'type' not in cfg:
raise RuntimeError('No object information in {}.'.format(cfg))
component = self._load_component(cfg.pop('type'))
params = {}
for key, val in cfg.items():
if self._is_meta_type(val):
params[key] = self._load_object(val)
elif isinstance(val, list):
params[key] = [
self._load_object(item)
if self._is_meta_type(item) else item for item in val
]
else:
params[key] = val
return component(**params)
def _is_meta_type(self, item: Any) -> bool:
return isinstance(item, dict) and 'type' in item
import os
import sys
import time
import requests
import tarfile
import zipfile
import shutil
import functools
lasttime = time.time()
FLUSH_INTERVAL = 0.1
def progress(str, end=False):
global lasttime
if end:
str += "\n"
lasttime = 0
if time.time() - lasttime >= FLUSH_INTERVAL:
sys.stdout.write("\r%s" % str)
lasttime = time.time()
sys.stdout.flush()
def _download_file(url, savepath, print_progress):
r = requests.get(url, stream=True)
total_length = r.headers.get('content-length')
if total_length is None:
with open(savepath, 'wb') as f:
shutil.copyfileobj(r.raw, f)
else:
with open(savepath, 'wb') as f:
dl = 0
total_length = int(total_length)
starttime = time.time()
if print_progress:
print("Downloading %s" % os.path.basename(savepath))
for data in r.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if print_progress:
done = int(50 * dl / total_length)
progress("[%-50s] %.2f%%" %
('=' * done, float(100 * dl) / total_length))
if print_progress:
progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
def _uncompress_file_zip(filepath, extrapath):
files = zipfile.ZipFile(filepath, 'r')
filelist = files.namelist()
rootpath = filelist[0]
total_num = len(filelist)
for index, file in enumerate(filelist):
files.extract(file, extrapath)
yield total_num, index, rootpath
files.close()
yield total_num, index, rootpath
def _uncompress_file_tar(filepath, extrapath, mode="r:gz"):
files = tarfile.open(filepath, mode)
filelist = files.getnames()
total_num = len(filelist)
rootpath = filelist[0]
for index, file in enumerate(filelist):
files.extract(file, extrapath)
yield total_num, index, rootpath
files.close()
yield total_num, index, rootpath
def _uncompress_file(filepath, extrapath, delete_file, print_progress):
if print_progress:
print("Uncompress %s" % os.path.basename(filepath))
if filepath.endswith("zip"):
handler = _uncompress_file_zip
elif filepath.endswith("tgz"):
handler = _uncompress_file_tar
else:
handler = functools.partial(_uncompress_file_tar, mode="r")
for total_num, index, rootpath in handler(filepath, extrapath):
if print_progress:
done = int(50 * float(index) / total_num)
progress(
"[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num))
if print_progress:
progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
if delete_file:
os.remove(filepath)
return rootpath
def download_file_and_uncompress(url,
savepath=None,
extrapath=None,
extraname=None,
print_progress=True,
cover=False,
delete_file=True):
if savepath is None:
savepath = "."
if extrapath is None:
extrapath = "."
savename = url.split("/")[-1]
savepath = os.path.join(savepath, savename)
savename = ".".join(savename.split(".")[:-1])
savename = os.path.join(extrapath, savename)
extraname = savename if extraname is None else os.path.join(
extrapath, extraname)
if cover:
if os.path.exists(savepath):
shutil.rmtree(savepath)
if os.path.exists(savename):
shutil.rmtree(savename)
if os.path.exists(extraname):
shutil.rmtree(extraname)
if not os.path.exists(extraname):
if not os.path.exists(savename):
if not os.path.exists(savepath):
_download_file(url, savepath, print_progress)
savename = _uncompress_file(savepath, extrapath, delete_file,
print_progress)
savename = os.path.join(extrapath, savename)
shutil.move(savename, extraname)
return extraname
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from collections import OrderedDict
import subprocess
import glob
import paddle
import paddle.fluid as fluid
import cv2
IS_WINDOWS = sys.platform == 'win32'
def _find_cuda_home():
'''Finds the CUDA install path. It refers to the implementation of
pytorch <https://github.com/pytorch/pytorch/blob/master/torch/utils/cpp_extension.py>.
'''
# Guess #1
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
if cuda_home is None:
# Guess #2
try:
which = 'where' if IS_WINDOWS else 'which'
nvcc = subprocess.check_output([which,
'nvcc']).decode().rstrip('\r\n')
cuda_home = os.path.dirname(os.path.dirname(nvcc))
except Exception:
# Guess #3
if IS_WINDOWS:
cuda_homes = glob.glob(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
if len(cuda_homes) == 0:
cuda_home = ''
else:
cuda_home = cuda_homes[0]
else:
cuda_home = '/usr/local/cuda'
if not os.path.exists(cuda_home):
cuda_home = None
return cuda_home
def _get_nvcc_info(cuda_home):
if cuda_home is not None and os.path.isdir(cuda_home):
try:
nvcc = os.path.join(cuda_home, 'bin/nvcc')
nvcc = subprocess.check_output(
"{} -V".format(nvcc), shell=True).decode()
nvcc = nvcc.strip().split('\n')[-1]
except subprocess.SubprocessError:
nvcc = "Not Available"
return nvcc
def _get_gpu_info():
try:
gpu_info = subprocess.check_output(['nvidia-smi',
'-L']).decode().strip()
gpu_info = gpu_info.split('\n')
for i in range(len(gpu_info)):
gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4])
except:
gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.'
return gpu_info
def get_environ_info():
"""collect environment information"""
env_info = {}
env_info['System Platform'] = sys.platform
if env_info['System Platform'] == 'linux':
try:
lsb_v = subprocess.check_output(['lsb_release',
'-v']).decode().strip()
lsb_v = lsb_v.replace('\t', ' ')
lsb_d = subprocess.check_output(['lsb_release',
'-d']).decode().strip()
lsb_d = lsb_d.replace('\t', ' ')
env_info['LSB'] = [lsb_v, lsb_d]
except:
pass
env_info['Python'] = sys.version.replace('\n', '')
compiled_with_cuda = paddle.fluid.is_compiled_with_cuda()
env_info['Paddle compiled with cuda'] = compiled_with_cuda
if compiled_with_cuda:
cuda_home = _find_cuda_home()
env_info['NVCC'] = _get_nvcc_info(cuda_home)
gpu_nums = fluid.core.get_cuda_device_count()
env_info['GPUs used'] = gpu_nums
env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get(
'CUDA_VISIBLE_DEVICES')
env_info['GPU'] = _get_gpu_info()
gcc = subprocess.check_output(['gcc', '--version']).decode()
gcc = gcc.strip().split('\n')[0]
env_info['GCC'] = gcc
env_info['PaddlePaddle'] = paddle.__version__
env_info['OpenCV'] = cv2.__version__
return env_info
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import sys
from paddle.fluid.dygraph.parallel import ParallelEnv
levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
log_level = 2
def log(level=2, message=""):
if ParallelEnv().local_rank == 0:
current_time = time.time()
time_array = time.localtime(current_time)
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if log_level >= level:
print(
"{} [{}]\t{}".format(current_time, levels[level],
message).encode("utf-8").decode("latin1"))
sys.stdout.flush()
def debug(message=""):
log(level=3, message=message)
def info(message=""):
log(level=2, message=message)
def warning(message=""):
log(level=1, message=message)
def error(message=""):
log(level=0, message=message)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import numpy as np
from scipy.sparse import csr_matrix
class ConfusionMatrix(object):
"""
Confusion Matrix for segmentation evaluation
"""
def __init__(self, num_classes=2, streaming=False):
self.confusion_matrix = np.zeros([num_classes, num_classes],
dtype='int64')
self.num_classes = num_classes
self.streaming = streaming
def calculate(self, pred, label, ignore=None):
# If not in streaming mode, clear matrix everytime when call `calculate`
if not self.streaming:
self.zero_matrix()
label = np.transpose(label, (0, 2, 3, 1))
ignore = np.transpose(ignore, (0, 2, 3, 1))
mask = np.array(ignore) == 1
label = np.asarray(label)[mask]
pred = np.asarray(pred)[mask]
one = np.ones_like(pred)
# Accumuate ([row=label, col=pred], 1) into sparse matrix
spm = csr_matrix((one, (label, pred)),
shape=(self.num_classes, self.num_classes))
spm = spm.todense()
self.confusion_matrix += spm
def zero_matrix(self):
""" Clear confusion matrix """
self.confusion_matrix = np.zeros([self.num_classes, self.num_classes],
dtype='int64')
def mean_iou(self):
iou_list = []
avg_iou = 0
# TODO: use numpy sum axis api to simpliy
vji = np.zeros(self.num_classes, dtype=int)
vij = np.zeros(self.num_classes, dtype=int)
for j in range(self.num_classes):
v_j = 0
for i in range(self.num_classes):
v_j += self.confusion_matrix[j][i]
vji[j] = v_j
for i in range(self.num_classes):
v_i = 0
for j in range(self.num_classes):
v_i += self.confusion_matrix[j][i]
vij[i] = v_i
for c in range(self.num_classes):
total = vji[c] + vij[c] - self.confusion_matrix[c][c]
if total == 0:
iou = 0
else:
iou = float(self.confusion_matrix[c][c]) / total
avg_iou += iou
iou_list.append(iou)
avg_iou = float(avg_iou) / float(self.num_classes)
return np.array(iou_list), avg_iou
def accuracy(self):
total = self.confusion_matrix.sum()
total_right = 0
for c in range(self.num_classes):
total_right += self.confusion_matrix[c][c]
if total == 0:
avg_acc = 0
else:
avg_acc = float(total_right) / total
vij = np.zeros(self.num_classes, dtype=int)
for i in range(self.num_classes):
v_i = 0
for j in range(self.num_classes):
v_i += self.confusion_matrix[j][i]
vij[i] = v_i
acc_list = []
for c in range(self.num_classes):
if vij[c] == 0:
acc = 0
else:
acc = self.confusion_matrix[c][c] / float(vij[c])
acc_list.append(acc)
return np.array(acc_list), avg_acc
def kappa(self):
vji = np.zeros(self.num_classes)
vij = np.zeros(self.num_classes)
for j in range(self.num_classes):
v_j = 0
for i in range(self.num_classes):
v_j += self.confusion_matrix[j][i]
vji[j] = v_j
for i in range(self.num_classes):
v_i = 0
for j in range(self.num_classes):
v_i += self.confusion_matrix[j][i]
vij[i] = v_i
total = self.confusion_matrix.sum()
# avoid spillovers
# TODO: is it reasonable to hard code 10000.0?
total = float(total) / 10000.0
vji = vji / 10000.0
vij = vij / 10000.0
tp = 0
tc = 0
for c in range(self.num_classes):
tp += vji[c] * vij[c]
tc += self.confusion_matrix[c][c]
tc = tc / 10000.0
pe = tp / (total * total)
po = tc / total
kappa = (po - pe) / (1 - pe)
return kappa
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
class Timer(object):
""" Simple timer class for measuring time consuming """
def __init__(self):
self._start_time = 0.0
self._end_time = 0.0
self._elapsed_time = 0.0
self._is_running = False
def start(self):
self._is_running = True
self._start_time = time.time()
def restart(self):
self.start()
def stop(self):
self._is_running = False
self._end_time = time.time()
def elapsed_time(self):
self._end_time = time.time()
self._elapsed_time = self._end_time - self._start_time
if not self.is_running:
return 0.0
return self._elapsed_time
@property
def is_running(self):
return self._is_running
def calculate_eta(remaining_step, speed):
if remaining_step < 0:
remaining_step = 0
remaining_time = int(remaining_step * speed)
result = "{:0>2}:{:0>2}:{:0>2}"
arr = []
for i in range(2, -1, -1):
arr.append(int(remaining_time / 60**i))
remaining_time %= 60**i
return result.format(*arr)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import math
import cv2
import paddle.fluid as fluid
from . import logger
def seconds_to_hms(seconds):
h = math.floor(seconds / 3600)
m = math.floor((seconds - h * 3600) / 60)
s = int(seconds - h * 3600 - m * 60)
hms_str = "{}:{}:{}".format(h, m, s)
return hms_str
def load_pretrained_model(model, pretrained_model):
if pretrained_model is not None:
logger.info('Load pretrained model from {}'.format(pretrained_model))
if os.path.exists(pretrained_model):
ckpt_path = os.path.join(pretrained_model, 'model')
try:
para_state_dict, _ = fluid.load_dygraph(ckpt_path)
except:
para_state_dict = fluid.load_program_state(pretrained_model)
model_state_dict = model.state_dict()
keys = model_state_dict.keys()
num_params_loaded = 0
for k in keys:
if k not in para_state_dict:
logger.warning("{} is not in pretrained model".format(k))
elif list(para_state_dict[k].shape) != list(
model_state_dict[k].shape):
logger.warning(
"[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
.format(k, para_state_dict[k].shape,
model_state_dict[k].shape))
else:
model_state_dict[k] = para_state_dict[k]
num_params_loaded += 1
model.set_dict(model_state_dict)
logger.info("There are {}/{} varaibles are loaded.".format(
num_params_loaded, len(model_state_dict)))
else:
raise ValueError(
'The pretrained model directory is not Found: {}'.format(
pretrained_model))
else:
logger.warning('No pretrained model to load, train from scratch')
def resume(model, optimizer, resume_model):
if resume_model is not None:
logger.info('Resume model from {}'.format(resume_model))
if os.path.exists(resume_model):
resume_model = os.path.normpath(resume_model)
ckpt_path = os.path.join(resume_model, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
optimizer.set_dict(opti_state_dict)
epoch = resume_model.split('_')[-1]
if epoch.isdigit():
epoch = int(epoch)
return epoch
else:
raise ValueError(
'The resume model directory is not Found: {}'.format(
resume_model))
else:
logger.info('No model need to resume')
def visualize(image, result, save_dir=None, weight=0.6):
"""
Convert segment result to color image, and save added image.
Args:
image: the path of origin image
result: the predict result of image
save_dir: the directory for saving visual image
weight: the image weight of visual image, and the result weight is (1 - weight)
"""
color_map = get_color_map_list(256)
color_map = np.array(color_map).astype("uint8")
# Use OpenCV LUT for color mapping
c1 = cv2.LUT(result, color_map[:, 0])
c2 = cv2.LUT(result, color_map[:, 1])
c3 = cv2.LUT(result, color_map[:, 2])
pseudo_img = np.dstack((c1, c2, c3))
im = cv2.imread(image)
vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0)
if save_dir is not None:
if not os.path.exists(save_dir):
os.makedirs(save_dir)
image_name = os.path.split(image)[-1]
out_path = os.path.join(save_dir, image_name)
cv2.imwrite(out_path, vis_result)
else:
return vis_result
def get_color_map_list(num_classes):
""" Returns the color map for visualizing the segmentation mask,
which can support arbitrary number of classes.
Args:
num_classes: Number of classes
Returns:
The color map
"""
num_classes += 1
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
color_map = color_map[1:]
return color_map
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册