diff --git a/dygraph/infer.py b/dygraph/infer.py index 2e6aa3f58a5adbe608ce02f31a80286e6e7c717f..a9c0d380f553e5c9f57c46a8aff91b28c3a1888e 100644 --- a/dygraph/infer.py +++ b/dygraph/infer.py @@ -17,11 +17,11 @@ import argparse import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv -from dygraph.datasets import DATASETS -import dygraph.transforms as T -from dygraph.cvlibs import manager -from dygraph.utils import get_environ_info -from dygraph.core import infer +from paddleseg.datasets import DATASETS +import paddleseg.transforms as T +from paddleseg.cvlibs import manager +from paddleseg.utils import get_environ_info +from paddleseg.core import infer def parse_args(): diff --git a/dygraph/models/fast_scnn.py b/dygraph/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..6bd9b4d6e656bcb3530c50be120293b4f3fb05c6 --- /dev/null +++ b/dygraph/models/fast_scnn.py @@ -0,0 +1,302 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle import fluid, nn + +from dygraph.cvlibs import manager +from dygraph.models import model_utils, pspnet +from dygraph.models.architectures import layer_utils + + +@manager.MODELS.add_component +class FastSCNN(fluid.dygraph.Layer): + """ + The FastSCNN implementation. + + As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) + even for high resolution images (1024x2048). + + The orginal artile refers to + Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network." + (https://arxiv.org/pdf/1902.04502.pdf) + + Args: + + num_classes (int): the unique number of target classes. Default to 2. + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. + if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False. + + ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255. + """ + + def __init__(self, + num_classes=2, + enable_auxiliary_loss=False, + ignore_index=255): + + super(FastSCNN, self).__init__() + + self.learning_to_downsample = LearningToDownsample(32, 48, 64) + self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) + self.feature_fusion = FeatureFusionModule(64, 128, 128) + self.classifier = Classifier(128, num_classes) + + if enable_auxiliary_loss: + self.auxlayer = model_utils.AuxLayer(64, 32, num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + self.ignore_index = ignore_index + + def forward(self, input, label=None): + + higher_res_features = self.learning_to_downsample(input) + x = self.global_feature_extractor(higher_res_features) + x = self.feature_fusion(higher_res_features, x) + logit = self.classifier(x) + logit = fluid.layers.resize_bilinear(logit, input.shape[2:]) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(higher_res_features) + auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:]) + + if self.training: + loss = model_utils.get_loss(logit, label) + if self.enable_auxiliary_loss: + auxiliary_loss = model_utils.get_loss(auxiliary_logit, label) + loss += (0.4 * auxiliary_loss) + return loss + else: + pred, score_map = model_utils.get_pred_score_map(logit) + return pred, score_map + + +class LearningToDownsample(fluid.dygraph.Layer): + """ + Learning to downsample module. + + This module consists of three downsampling blocks (one Conv and two separable Conv) + + Args: + dw_channels1 (int): the input channels of the first sep conv. Default to 32. + + dw_channels2 (int): the input channels of the second sep conv. Default to 48. + + out_channels (int): the output channels of LearningToDownsample module. Default to 64. + """ + + def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): + super(LearningToDownsample, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3, + num_filters=dw_channels1, + filter_size=3, + stride=2) + self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1, + num_filters=dw_channels2, + filter_size=3, + using_sep_conv=True, # using sep conv + stride=2, + padding=1) + self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2, + num_filters=out_channels, + filter_size=3, + using_sep_conv=True, # using sep conv + stride=2, + padding=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = self.dsconv_bn_relu1(x) + x = self.dsconv_bn_relu2(x) + return x + + +class GlobalFeatureExtractor(fluid.dygraph.Layer): + """ + Global feature extractor module + + This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and + a PPModule (introduced by PSPNet). + + Args: + in_channels (int): the number of input channels to the module. Default to 64. + block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128). + out_channels (int): the number of output channels of the module. Default to 128. + expansion (int): the expansion factor in bottleneck. Default to 6. + num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3). + """ + + def __init__(self, in_channels=64, block_channels=(64, 96, 128), + out_channels=128, expansion=6, num_blocks=(3, 3, 3)): + super(GlobalFeatureExtractor, self).__init__() + + self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion, + 2) + self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], + expansion, 2) + self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], + expansion, 1) + + self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True) + + def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1): + layers = [] + layers.append(block(in_channels, out_channels, expansion, stride)) + for i in range(1, blocks): + layers.append(block(out_channels, out_channels, expansion, 1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = self.ppm(x) + return x + + +class LinearBottleneck(fluid.dygraph.Layer): + """ + Single bottleneck implementation. + + Args: + in_channels (int): the number of input channels to bottleneck block. + + out_channels (int): the number of output channels of bottleneck block. + + expansion (int). the expansion factor in bottleneck. Default to 6. + + stride (int). the stride used in depth-wise conv. + """ + + def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs): + super(LinearBottleneck, self).__init__() + + self.use_shortcut = stride == 1 and in_channels == out_channels + + expand_channels = in_channels * expansion + self.block = nn.Sequential( + # pw + layer_utils.ConvBnRelu(num_channels=in_channels, + num_filters=expand_channels, + filter_size=1, + bias_attr=False), + # dw + layer_utils.ConvBnRelu(num_channels=expand_channels, + num_filters=expand_channels, + filter_size=3, + stride=stride, + padding=1, + groups=expand_channels, + bias_attr=False), + # pw-linear + nn.Conv2D(num_channels=expand_channels, + num_filters=out_channels, + filter_size=1, + bias_attr=False), + + nn.BatchNorm(out_channels) + ) + + def forward(self, x): + out = self.block(x) + if self.use_shortcut: + out = x + out + return out + + +class FeatureFusionModule(fluid.dygraph.Layer): + """ + Feature Fusion Module Implememtation. + + This module fuses high-resolution feature and low-resolution feature. + + Args: + high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample). + + low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor). + + out_channels (int). the output channels of this module. + """ + + def __init__(self, high_in_channels, low_in_channels, out_channels): + super(FeatureFusionModule, self).__init__() + + # There only depth-wise conv is used WITHOUT point-sied conv + self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels, + num_filters=out_channels, + filter_size=3, + padding=1, + groups=128) + + self.conv_low_res = nn.Sequential( + nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1), + nn.BatchNorm(out_channels)) + + self.conv_high_res = nn.Sequential( + nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1), + nn.BatchNorm(out_channels)) + + self.relu = nn.ReLU(True) + + def forward(self, high_res_input, low_res_input): + low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4) + low_res_input = self.dwconv(low_res_input) + low_res_input = self.conv_low_res(low_res_input) + + high_res_input = self.conv_high_res(high_res_input) + + x = high_res_input + low_res_input + + return self.relu(x) + + +class Classifier(fluid.dygraph.Layer): + """ + The Classifier module implemetation. + + This module consists of two depth-wsie conv and one conv. + + Args: + input_channels (int): the input channels to this module. + + num_classes (int). the unique number of target classes. + + """ + + def __init__(self, input_channels, num_classes): + super(Classifier, self).__init__() + + self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels, + num_filters=input_channels, + filter_size=3, + using_sep_conv=True # using sep conv + ) + + self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels, + num_filters=input_channels, + filter_size=3, + using_sep_conv=True # using sep conv + ) + + self.conv = nn.Conv2D(num_channels=input_channels, + num_filters=num_classes, + filter_size=1) + + def forward(self, x): + x = self.dsconv1(x) + x = self.dsconv2(x) + x = fluid.layers.dropout(x, dropout_prob=0.1) + x = self.conv(x) + return x diff --git a/dygraph/models/model_utils.py b/dygraph/models/model_utils.py index e0a88c355a78d98ff312aaa75cf175a2369ffa5d..7f52919915faf3fa2cca6b567e0c6b8a105e7e0b 100644 --- a/dygraph/models/model_utils.py +++ b/dygraph/models/model_utils.py @@ -18,7 +18,8 @@ import paddle.nn.functional as F from paddle import fluid from paddle.fluid import dygraph from paddle.fluid.dygraph import Conv2D -from paddle.nn import SyncBatchNorm as BatchNorm +#from paddle.nn import SyncBatchNorm as BatchNorm +from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm from dygraph.models.architectures import layer_utils @@ -47,10 +48,37 @@ class FCNHead(fluid.dygraph.Layer): def forward(self, x): x = self.conv_bn_relu(x) - x = F.dropout(x, p=0.1) + x = F.dropout(x, dropout_prob=0.1) x = self.conv(x) return x +class AuxLayer(fluid.dygraph.Layer): + """ + The auxilary layer implementation for auxilary loss + + Args: + in_channels (int): the number of input channels. + inter_channels (int): intermediate channels. + out_channels (int): the number of output channels, which is usually num_classes. + """ + + def __init__(self, in_channels, inter_channels, out_channels): + super(AuxLayer, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels, + num_filters=inter_channels, + filter_size=3, + padding=1) + + self.conv = Conv2D(num_channels=inter_channels, + num_filters=out_channels, + filter_size=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = F.dropout(x, dropout_prob=0.1) + x = self.conv(x) + return x def get_loss(logit, label, ignore_index=255, EPS=1e-5): """ diff --git a/dygraph/models/pspnet.py b/dygraph/models/pspnet.py index d4457ed53435aa75257b68c476b55c15ab701c68..0e376e21ca7d6c57b2d0b121e82a3ca0f5a57c10 100644 --- a/dygraph/models/pspnet.py +++ b/dygraph/models/pspnet.py @@ -148,23 +148,27 @@ class PPModule(fluid.dygraph.Layer): out_channels (int): the number of output channels after pyramid pooling module. bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). + + dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True. """ - def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6)): + def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True): super(PPModule, self).__init__() self.bin_sizes = bin_sizes + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + # we use dimension reduction after pooling mentioned in original implementation. - self.stages = fluid.dygraph.LayerList( - [self._make_stage(in_channels, size) for size in bin_sizes]) + self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes]) - self.conv_bn_relu2 = layer_utils.ConvBnRelu( - num_channels=in_channels * 2, - num_filters=out_channels, - filter_size=3, - padding=1) + self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes), + num_filters=out_channels, + filter_size=3, + padding=1) - def _make_stage(self, in_channels, size): + def _make_stage(self, in_channels, out_channels, size): """ Create one pooling layer. @@ -186,10 +190,9 @@ class PPModule(fluid.dygraph.Layer): # this paddle version does not support AdaptiveAvgPool2d, so skip it here. # prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) - conv = layer_utils.ConvBnRelu( - num_channels=in_channels, - num_filters=in_channels // len(self.bin_sizes), - filter_size=1) + conv = layer_utils.ConvBnRelu(num_channels=in_channels, + num_filters=out_channels, + filter_size=1) return conv diff --git a/dygraph/paddleseg/__init__.py b/dygraph/paddleseg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e2950fcdf71fe14a60b485bc786ef655be907a0 --- /dev/null +++ b/dygraph/paddleseg/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import models +from . import datasets +from . import transforms \ No newline at end of file diff --git a/dygraph/paddleseg/core/__init__.py b/dygraph/paddleseg/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..202629f542f40a2741cb12022adb10d7a56861b5 --- /dev/null +++ b/dygraph/paddleseg/core/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .train import train +from .val import evaluate +from .infer import infer + +__all__ = ['train', 'evaluate', 'infer'] diff --git a/dygraph/paddleseg/core/infer.py b/dygraph/paddleseg/core/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..a9e671e05b873ace440b48102959423851b3aa80 --- /dev/null +++ b/dygraph/paddleseg/core/infer.py @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from paddle.fluid.dygraph.base import to_variable +import numpy as np +import paddle.fluid as fluid +import cv2 +import tqdm + +from paddleseg import utils +import paddleseg.utils.logger as logger + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +def infer(model, test_dataset=None, model_dir=None, save_dir='output'): + ckpt_path = os.path.join(model_dir, 'model') + para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) + model.set_dict(para_state_dict) + model.eval() + + added_saved_dir = os.path.join(save_dir, 'added') + pred_saved_dir = os.path.join(save_dir, 'prediction') + + logger.info("Start to predict...") + for im, im_info, im_path in tqdm.tqdm(test_dataset): + im = to_variable(im) + pred, _ = model(im) + pred = pred.numpy() + pred = np.squeeze(pred).astype('uint8') + for info in im_info[::-1]: + if info[0] == 'resize': + h, w = info[1][0], info[1][1] + pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) + elif info[0] == 'padding': + h, w = info[1][0], info[1][1] + pred = pred[0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format( + info[0])) + + im_file = im_path.replace(test_dataset.dataset_root, '') + if im_file[0] == '/': + im_file = im_file[1:] + # save added image + added_image = utils.visualize(im_path, pred, weight=0.6) + added_image_path = os.path.join(added_saved_dir, im_file) + mkdir(added_image_path) + cv2.imwrite(added_image_path, added_image) + + # save prediction + pred_im = utils.visualize(im_path, pred, weight=0.0) + pred_saved_path = os.path.join(pred_saved_dir, im_file) + mkdir(pred_saved_path) + cv2.imwrite(pred_saved_path, pred_im) diff --git a/dygraph/paddleseg/core/train.py b/dygraph/paddleseg/core/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a2621a622ac5bb5a995a260c4cdb67f16f5a1203 --- /dev/null +++ b/dygraph/paddleseg/core/train.py @@ -0,0 +1,193 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph.parallel import ParallelEnv +from paddle.fluid.io import DataLoader +# from paddle.incubate.hapi.distributed import DistributedBatchSampler +from paddle.io import DistributedBatchSampler +import paddle.nn.functional as F + +import paddleseg.utils.logger as logger +from paddleseg.utils import load_pretrained_model +from paddleseg.utils import resume +from paddleseg.utils import Timer, calculate_eta +from .val import evaluate + + +def check_logits_losses(logits, losses): + len_logits = len(logits) + len_losses = len(losses['types']) + if len_logits != len_losses: + raise RuntimeError( + 'The length of logits should equal to the types of loss config: {} != {}.' + .format(len_logits, len_losses)) + + +def loss_computation(logits, label, losses): + check_logits_losses(logits, losses) + loss = 0 + for i in range(len(logits)): + logit = logits[i] + if logit.shape[-2:] != label.shape[-2:]: + logit = F.resize_bilinear(logit, label.shape[-2:]) + loss_i = losses['types'][i](logit, label) + loss += losses['coef'][i] * loss_i + return loss + + +def train(model, + train_dataset, + places=None, + eval_dataset=None, + optimizer=None, + save_dir='output', + iters=10000, + batch_size=2, + resume_model=None, + save_interval_iters=1000, + log_iters=10, + num_classes=None, + num_workers=8, + use_vdl=False, + losses=None, + ignore_index=255): + + nranks = ParallelEnv().nranks + + start_iter = 0 + if resume_model is not None: + start_iter = resume(model, optimizer, resume_model) + + if not os.path.isdir(save_dir): + if os.path.exists(save_dir): + os.remove(save_dir) + os.makedirs(save_dir) + + if nranks > 1: + strategy = fluid.dygraph.prepare_context() + ddp_model = fluid.dygraph.DataParallel(model, strategy) + + batch_sampler = DistributedBatchSampler( + train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) + loader = DataLoader( + train_dataset, + batch_sampler=batch_sampler, + places=places, + num_workers=num_workers, + return_list=True, + ) + + if use_vdl: + from visualdl import LogWriter + log_writer = LogWriter(save_dir) + + timer = Timer() + avg_loss = 0.0 + iters_per_epoch = len(batch_sampler) + best_mean_iou = -1.0 + best_model_iter = -1 + train_reader_cost = 0.0 + train_batch_cost = 0.0 + timer.start() + + iter = start_iter + while iter < iters: + for data in loader: + iter += 1 + if iter > iters: + break + train_reader_cost += timer.elapsed_time() + images = data[0] + labels = data[1].astype('int64') + if nranks > 1: + logits = ddp_model(images) + loss = loss_computation(logits, labels, losses) + # loss = ddp_model(images, labels) + # apply_collective_grads sum grads over multiple gpus. + loss = ddp_model.scale_loss(loss) + loss.backward() + ddp_model.apply_collective_grads() + else: + logits = model(images) + loss = loss_computation(logits, labels, losses) + # loss = model(images, labels) + loss.backward() + optimizer.minimize(loss) + model.clear_gradients() + avg_loss += loss.numpy()[0] + lr = optimizer.current_step_lr() + train_batch_cost += timer.elapsed_time() + if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0: + avg_loss /= log_iters + avg_train_reader_cost = train_reader_cost / log_iters + avg_train_batch_cost = train_batch_cost / log_iters + train_reader_cost = 0.0 + train_batch_cost = 0.0 + remain_iters = iters - iter + eta = calculate_eta(remain_iters, avg_train_batch_cost) + logger.info( + "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}" + .format((iter - 1) // iters_per_epoch + 1, iter, iters, + avg_loss * nranks, lr, avg_train_batch_cost, + avg_train_reader_cost, eta)) + if use_vdl: + log_writer.add_scalar('Train/loss', avg_loss * nranks, iter) + log_writer.add_scalar('Train/lr', lr, iter) + log_writer.add_scalar('Train/batch_cost', + avg_train_batch_cost, iter) + log_writer.add_scalar('Train/reader_cost', + avg_train_reader_cost, iter) + avg_loss = 0.0 + + if (iter % save_interval_iters == 0 + or iter == iters) and ParallelEnv().local_rank == 0: + current_save_dir = os.path.join(save_dir, + "iter_{}".format(iter)) + if not os.path.isdir(current_save_dir): + os.makedirs(current_save_dir) + fluid.save_dygraph(model.state_dict(), + os.path.join(current_save_dir, 'model')) + fluid.save_dygraph(optimizer.state_dict(), + os.path.join(current_save_dir, 'model')) + + if eval_dataset is not None: + mean_iou, avg_acc = evaluate( + model, + eval_dataset, + model_dir=current_save_dir, + num_classes=num_classes, + ignore_index=ignore_index, + iter_id=iter) + if mean_iou > best_mean_iou: + best_mean_iou = mean_iou + best_model_iter = iter + best_model_dir = os.path.join(save_dir, "best_model") + fluid.save_dygraph( + model.state_dict(), + os.path.join(best_model_dir, 'model')) + logger.info( + 'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}' + .format(best_model_iter, best_mean_iou)) + + if use_vdl: + log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter) + log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter) + model.train() + timer.restart() + if use_vdl: + log_writer.close() diff --git a/dygraph/paddleseg/core/val.py b/dygraph/paddleseg/core/val.py new file mode 100644 index 0000000000000000000000000000000000000000..c104b2d8bf67419c58f15ba75989720662b0a2d8 --- /dev/null +++ b/dygraph/paddleseg/core/val.py @@ -0,0 +1,89 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import tqdm +import cv2 +from paddle.fluid.dygraph.base import to_variable +import paddle.fluid as fluid +import paddle.nn.functional as F +import paddle + +import paddleseg.utils.logger as logger +from paddleseg.utils import ConfusionMatrix +from paddleseg.utils import Timer, calculate_eta + + +def evaluate(model, + eval_dataset=None, + model_dir=None, + num_classes=None, + ignore_index=255, + iter_id=None): + ckpt_path = os.path.join(model_dir, 'model') + para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) + model.set_dict(para_state_dict) + model.eval() + + total_iters = len(eval_dataset) + conf_mat = ConfusionMatrix(num_classes, streaming=True) + + logger.info( + "Start to evaluating(total_samples={}, total_iters={})...".format( + len(eval_dataset), total_iters)) + timer = Timer() + timer.start() + for iter, (im, im_info, label) in tqdm.tqdm( + enumerate(eval_dataset), total=total_iters): + im = to_variable(im) + # pred, _ = model(im) + logits = model(im) + pred = paddle.argmax(logits[0], axis=1) + pred = pred.numpy().astype('float32') + pred = np.squeeze(pred) + for info in im_info[::-1]: + if info[0] == 'resize': + h, w = info[1][0], info[1][1] + pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) + elif info[0] == 'padding': + h, w = info[1][0], info[1][1] + pred = pred[0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format( + info[0])) + pred = pred[np.newaxis, :, :, np.newaxis] + pred = pred.astype('int64') + mask = label != ignore_index + + conf_mat.calculate(pred=pred, label=label, ignore=mask) + _, iou = conf_mat.mean_iou() + + time_iter = timer.elapsed_time() + remain_iter = total_iters - iter - 1 + logger.debug( + "[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}" + .format(iter_id, iter + 1, total_iters, iou, time_iter, + calculate_eta(remain_iter, time_iter))) + timer.restart() + + category_iou, miou = conf_mat.mean_iou() + category_acc, macc = conf_mat.accuracy() + logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format( + len(eval_dataset), macc, miou)) + logger.info("[EVAL] Category IoU: " + str(category_iou)) + logger.info("[EVAL] Category Acc: " + str(category_acc)) + logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa())) + return miou, macc diff --git a/dygraph/paddleseg/cvlibs/__init__.py b/dygraph/paddleseg/cvlibs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18812001388cbfd1ecf7dc4d38398ddd91711af4 --- /dev/null +++ b/dygraph/paddleseg/cvlibs/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import manager +from . import param_init diff --git a/dygraph/paddleseg/cvlibs/manager.py b/dygraph/paddleseg/cvlibs/manager.py new file mode 100644 index 0000000000000000000000000000000000000000..339070069c7e39532ec7fe2c826851a8d0f53df6 --- /dev/null +++ b/dygraph/paddleseg/cvlibs/manager.py @@ -0,0 +1,118 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections.abc import Sequence +import inspect + + +class ComponentManager: + """ + Implement a manager class to add the new component properly. + The component can be added as either class or function type. + For example: + >>> model_manager = ComponentManager() + >>> class AlexNet: ... + >>> class ResNet: ... + >>> model_manager.add_component(AlexNet) + >>> model_manager.add_component(ResNet) + or pass a sequence alliteratively: + >>> model_manager.add_component([AlexNet, ResNet]) + >>> print(model_manager.components_dict) + output: {'AlexNet': , 'ResNet': } + + Or an easier way, using it as a Python decorator, while just add it above the class declaration. + >>> model_manager = ComponentManager() + >>> @model_manager.add_component + >>> class AlexNet: ... + >>> @model_manager.add_component + >>> class ResNet: ... + >>> print(model_manager.components_dict) + output: {'AlexNet': , 'ResNet': } + """ + + def __init__(self): + self._components_dict = dict() + + def __len__(self): + return len(self._components_dict) + + def __repr__(self): + return "{}:{}".format(self.__class__.__name__, + list(self._components_dict.keys())) + + def __getitem__(self, item): + if item not in self._components_dict.keys(): + raise KeyError("{} does not exist in the current {}".format( + item, self)) + return self._components_dict[item] + + @property + def components_dict(self): + return self._components_dict + + def _add_single_component(self, component): + """ + Add a single component into the corresponding manager + + Args: + component (function | class): a new component + + Returns: + None + """ + + # Currently only support class or function type + if not (inspect.isclass(component) or inspect.isfunction(component)): + raise TypeError( + "Expect class/function type, but received {}".format( + type(component))) + + # Obtain the internal name of the component + component_name = component.__name__ + + # Check whether the component was added already + if component_name in self._components_dict.keys(): + raise KeyError("{} exists already!".format(component_name)) + else: + # Take the internal name of the component as its key + self._components_dict[component_name] = component + + def add_component(self, components): + """ + Add component(s) into the corresponding manager + + Args: + components (function | class | list | tuple): support three types of components + + Returns: + None + """ + + # Check whether the type is a sequence + if isinstance(components, Sequence): + for component in components: + self._add_single_component(component) + else: + component = components + self._add_single_component(component) + + return components + + +MODELS = ComponentManager() +BACKBONES = ComponentManager() +DATASETS = ComponentManager() +TRANSFORMS = ComponentManager() +LOSSES = ComponentManager() diff --git a/dygraph/paddleseg/cvlibs/param_init.py b/dygraph/paddleseg/cvlibs/param_init.py new file mode 100644 index 0000000000000000000000000000000000000000..567399c0a0c7d2310931b1c0ccae13cd0d5422b1 --- /dev/null +++ b/dygraph/paddleseg/cvlibs/param_init.py @@ -0,0 +1,25 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid + + +def constant_init(param, **kwargs): + initializer = fluid.initializer.Constant(**kwargs) + initializer(param, param.block) + + +def normal_init(param, **kwargs): + initializer = fluid.initializer.Normal(**kwargs) + initializer(param, param.block) diff --git a/dygraph/paddleseg/datasets/__init__.py b/dygraph/paddleseg/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37d8da36997b3ec2a74b92199242eba126a0cefc --- /dev/null +++ b/dygraph/paddleseg/datasets/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .dataset import Dataset +from .optic_disc_seg import OpticDiscSeg +from .cityscapes import Cityscapes +from .voc import PascalVOC +from .ade import ADE20K + +DATASETS = { + "OpticDiscSeg": OpticDiscSeg, + "Cityscapes": Cityscapes, + "PascalVOC": PascalVOC, + "ADE20K": ADE20K +} diff --git a/dygraph/paddleseg/datasets/ade.py b/dygraph/paddleseg/datasets/ade.py new file mode 100644 index 0000000000000000000000000000000000000000..6614739899789e8fd8b13db4b7cb9ee798acaeae --- /dev/null +++ b/dygraph/paddleseg/datasets/ade.py @@ -0,0 +1,100 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +from PIL import Image + +from .dataset import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') +URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip" + + +@manager.DATASETS.add_component +class ADE20K(Dataset): + """ADE20K dataset `http://sceneparsing.csail.mit.edu/`. + Args: + dataset_root: The dataset directory. + mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'. + transforms: Transforms for image. + download: Whether to download dataset if `dataset_root` is None. + """ + + def __init__(self, + dataset_root=None, + mode='train', + transforms=None, + download=True): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.mode = mode + self.file_list = list() + self.num_classes = 150 + + if mode.lower() not in ['train', 'val']: + raise Exception( + "`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}." + .format(mode)) + + if self.transforms is None: + raise Exception("`transforms` is necessary, but it is None.") + + if self.dataset_root is None: + if not download: + raise Exception( + "`dataset_root` not set and auto download disabled.") + self.dataset_root = download_file_and_uncompress( + url=URL, + savepath=DATA_HOME, + extrapath=DATA_HOME, + extraname='ADEChallengeData2016') + elif not os.path.exists(self.dataset_root): + raise Exception('there is not `dataset_root`: {}.'.format( + self.dataset_root)) + + if mode == 'train': + img_dir = os.path.join(self.dataset_root, 'images/training') + grt_dir = os.path.join(self.dataset_root, 'annotations/training') + elif mode == 'val': + img_dir = os.path.join(self.dataset_root, 'images/validation') + grt_dir = os.path.join(self.dataset_root, 'annotations/validation') + img_files = os.listdir(img_dir) + grt_files = [i.replace('.jpg', '.png') for i in img_files] + for i in range(len(img_files)): + img_path = os.path.join(img_dir, img_files[i]) + grt_path = os.path.join(grt_dir, grt_files[i]) + self.file_list.append([img_path, grt_path]) + + def __getitem__(self, idx): + image_path, grt_path = self.file_list[idx] + if self.mode == 'test': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + return im, im_info, image_path + elif self.mode == 'val': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + label = np.asarray(Image.open(grt_path)) + label = label - 1 + label = label[np.newaxis, np.newaxis, :, :] + return im, im_info, label + else: + im, im_info, label = self.transforms(im=image_path, label=grt_path) + label = label - 1 + return im, label diff --git a/dygraph/paddleseg/datasets/cityscapes.py b/dygraph/paddleseg/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f3045d74fe621d165047bbba02a5a1908a7ebd23 --- /dev/null +++ b/dygraph/paddleseg/datasets/cityscapes.py @@ -0,0 +1,77 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob + +from .dataset import Dataset +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + + +@manager.DATASETS.add_component +class Cityscapes(Dataset): + """Cityscapes dataset `https://www.cityscapes-dataset.com/`. + The folder structure is as follow: + cityscapes + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools. + + Args: + dataset_root: Cityscapes dataset directory. + mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + transforms: Transforms for image. + """ + + def __init__(self, dataset_root, transforms=None, mode='train'): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + self.mode = mode + self.num_classes = 19 + + if mode.lower() not in ['train', 'val', 'test']: + raise Exception( + "mode should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise Exception("`transforms` is necessary, but it is None.") + + img_dir = os.path.join(self.dataset_root, 'leftImg8bit') + grt_dir = os.path.join(self.dataset_root, 'gtFine') + if self.dataset_root is None or not os.path.isdir( + self.dataset_root) or not os.path.isdir( + img_dir) or not os.path.isdir(grt_dir): + raise Exception( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + grt_files = sorted( + glob.glob( + os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png'))) + img_files = sorted( + glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png'))) + + self.file_list = [[img_path, grt_path] + for img_path, grt_path in zip(img_files, grt_files)] diff --git a/dygraph/paddleseg/datasets/dataset.py b/dygraph/paddleseg/datasets/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..30af28c3d03c43194e4d58e267aa8ed6c46c8156 --- /dev/null +++ b/dygraph/paddleseg/datasets/dataset.py @@ -0,0 +1,139 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle.fluid as fluid +import numpy as np +from PIL import Image + +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + + +@manager.DATASETS.add_component +class Dataset(fluid.io.Dataset): + """Pass in a custom dataset that conforms to the format. + + Args: + dataset_root: The dataset directory. + num_classes: Number of classes. + mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + train_list: The train dataset file. When image_set is 'train', train_list is necessary. + The contents of train_list file are as follow: + image1.jpg ground_truth1.png + image2.jpg ground_truth2.png + val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary. + The contents is the same as train_list + test_list: The test dataset file. When image_set is 'test', test_list is necessary. + The annotation file is not necessary in test_list file. + separator: The separator of dataset list. Default: ' '. + transforms: Transforms for image. + + Examples: + todo + + """ + + def __init__(self, + dataset_root, + num_classes, + mode='train', + train_list=None, + val_list=None, + test_list=None, + separator=' ', + transforms=None): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + self.mode = mode + self.num_classes = num_classes + + if mode.lower() not in ['train', 'val', 'test']: + raise Exception( + "mode should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise Exception("`transforms` is necessary, but it is None.") + + self.dataset_root = dataset_root + if not os.path.exists(self.dataset_root): + raise Exception('there is not `dataset_root`: {}.'.format( + self.dataset_root)) + + if mode == 'train': + if train_list is None: + raise Exception( + 'When `mode` is "train", `train_list` is necessary, but it is None.' + ) + elif not os.path.exists(train_list): + raise Exception( + '`train_list` is not found: {}'.format(train_list)) + else: + file_list = train_list + elif mode == 'val': + if val_list is None: + raise Exception( + 'When `mode` is "val", `val_list` is necessary, but it is None.' + ) + elif not os.path.exists(val_list): + raise Exception('`val_list` is not found: {}'.format(val_list)) + else: + file_list = val_list + else: + if test_list is None: + raise Exception( + 'When `mode` is "test", `test_list` is necessary, but it is None.' + ) + elif not os.path.exists(test_list): + raise Exception( + '`test_list` is not found: {}'.format(test_list)) + else: + file_list = test_list + + with open(file_list, 'r') as f: + for line in f: + items = line.strip().split(separator) + if len(items) != 2: + if mode == 'train' or mode == 'val': + raise Exception( + "File list format incorrect! In training or evaluation task it should be" + " image_name{}label_name\\n".format(separator)) + image_path = os.path.join(self.dataset_root, items[0]) + grt_path = None + else: + image_path = os.path.join(self.dataset_root, items[0]) + grt_path = os.path.join(self.dataset_root, items[1]) + self.file_list.append([image_path, grt_path]) + + def __getitem__(self, idx): + image_path, grt_path = self.file_list[idx] + if self.mode == 'test': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + return im, im_info, image_path + elif self.mode == 'val': + im, im_info, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + label = np.asarray(Image.open(grt_path)) + label = label[np.newaxis, np.newaxis, :, :] + return im, im_info, label + else: + im, im_info, label = self.transforms(im=image_path, label=grt_path) + return im, label + + def __len__(self): + return len(self.file_list) diff --git a/dygraph/paddleseg/datasets/optic_disc_seg.py b/dygraph/paddleseg/datasets/optic_disc_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..6c1dedde24f0dd4a9bf9d922912da3c57bd37569 --- /dev/null +++ b/dygraph/paddleseg/datasets/optic_disc_seg.py @@ -0,0 +1,77 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from .dataset import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') +URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" + + +@manager.DATASETS.add_component +class OpticDiscSeg(Dataset): + def __init__(self, + dataset_root=None, + transforms=None, + mode='train', + download=True): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + self.mode = mode + self.num_classes = 2 + + if mode.lower() not in ['train', 'val', 'test']: + raise Exception( + "`mode` should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise Exception("`transforms` is necessary, but it is None.") + + if self.dataset_root is None: + if not download: + raise Exception( + "`data_root` not set and auto download disabled.") + self.dataset_root = download_file_and_uncompress( + url=URL, savepath=DATA_HOME, extrapath=DATA_HOME) + elif not os.path.exists(self.dataset_root): + raise Exception('there is not `dataset_root`: {}.'.format( + self.dataset_root)) + + if mode == 'train': + file_list = os.path.join(self.dataset_root, 'train_list.txt') + elif mode == 'val': + file_list = os.path.join(self.dataset_root, 'val_list.txt') + else: + file_list = os.path.join(self.dataset_root, 'test_list.txt') + + with open(file_list, 'r') as f: + for line in f: + items = line.strip().split() + if len(items) != 2: + if mode == 'train' or mode == 'val': + raise Exception( + "File list format incorrect! It should be" + " image_name label_name\\n") + image_path = os.path.join(self.dataset_root, items[0]) + grt_path = None + else: + image_path = os.path.join(self.dataset_root, items[0]) + grt_path = os.path.join(self.dataset_root, items[1]) + self.file_list.append([image_path, grt_path]) diff --git a/dygraph/paddleseg/datasets/rice.py b/dygraph/paddleseg/datasets/rice.py new file mode 100644 index 0000000000000000000000000000000000000000..f8041526fa2e265e0eac70709e9c295e860df9ad --- /dev/null +++ b/dygraph/paddleseg/datasets/rice.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from .dataset import Dataset + + +class Rice(Dataset): + def __init__(self, transforms=None, mode='train', download=True): + self.data_dir = "/mnt/liuyi22/PaddlePaddle/POC/rice_dataset" + self.transforms = transforms + self.file_list = list() + self.mode = mode + self.num_classes = 2 + + if mode.lower() not in ['train', 'eval', 'test']: + raise Exception( + "mode should be 'train', 'eval' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise Exception("transform is necessary, but it is None.") + + if mode == 'train': + file_list = os.path.join(self.data_dir, 'train_list.txt') + elif mode == 'eval': + file_list = os.path.join(self.data_dir, 'val_list.txt') + else: + file_list = os.path.join(self.data_dir, 'test_list.txt') + + with open(file_list, 'r') as f: + for line in f: + items = line.strip().split() + if len(items) != 2: + if mode == 'train' or mode == 'eval': + raise Exception( + "File list format incorrect! It should be" + " image_name label_name\\n") + image_path = os.path.join(self.data_dir, items[0]) + grt_path = None + else: + image_path = os.path.join(self.data_dir, items[0]) + grt_path = os.path.join(self.data_dir, items[1]) + self.file_list.append([image_path, grt_path]) diff --git a/dygraph/paddleseg/datasets/voc.py b/dygraph/paddleseg/datasets/voc.py new file mode 100644 index 0000000000000000000000000000000000000000..c6ac4b6a3e3540ae1b89fe0d1bac580acb0333e9 --- /dev/null +++ b/dygraph/paddleseg/datasets/voc.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from .dataset import Dataset +from paddleseg.utils.download import download_file_and_uncompress +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') +URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" + + +@manager.DATASETS.add_component +class PascalVOC(Dataset): + """Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset, + please run the voc_augment.py in tools. + Args: + dataset_root: The dataset directory. + mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'. + transforms: Transforms for image. + download: Whether to download dataset if dataset_root is None. + """ + + def __init__(self, + dataset_root=None, + mode='train', + transforms=None, + download=True): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.mode = mode + self.file_list = list() + self.num_classes = 21 + + if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']: + raise Exception( + "`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}." + .format(mode)) + + if self.transforms is None: + raise Exception("`transforms` is necessary, but it is None.") + + if self.dataset_root is None: + if not download: + raise Exception( + "`dataset_root` not set and auto download disabled.") + self.dataset_root = download_file_and_uncompress( + url=URL, + savepath=DATA_HOME, + extrapath=DATA_HOME, + extraname='VOCdevkit') + elif not os.path.exists(self.dataset_root): + raise Exception('there is not `dataset_root`: {}.'.format( + self.dataset_root)) + + image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets', + 'Segmentation') + if mode == 'train': + file_list = os.path.join(image_set_dir, 'train.txt') + elif mode == 'val': + file_list = os.path.join(image_set_dir, 'val.txt') + elif mode == 'trainval': + file_list = os.path.join(image_set_dir, 'trainval.txt') + elif mode == 'trainaug': + file_list = os.path.join(image_set_dir, 'train.txt') + file_list_aug = os.path.join(image_set_dir, 'aug.txt') + + if not os.path.exists(file_list_aug): + raise Exception( + "When `mode` is 'trainaug', Pascal Voc dataset should be augmented, " + "Please make sure voc_augment.py has been properly run when using this mode." + ) + + img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages') + grt_dir = os.path.join(self.dataset_root, 'VOC2012', + 'SegmentationClass') + grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012', + 'SegmentationClassAug') + + with open(file_list, 'r') as f: + for line in f: + line = line.strip() + image_path = os.path.join(img_dir, ''.join([line, '.jpg'])) + grt_path = os.path.join(grt_dir, ''.join([line, '.png'])) + self.file_list.append([image_path, grt_path]) + if mode == 'trainaug': + with open(file_list_aug, 'r') as f: + for line in f: + line = line.strip() + image_path = os.path.join(img_dir, ''.join([line, '.jpg'])) + grt_path = os.path.join(grt_dir_aug, ''.join([line, + '.png'])) + self.file_list.append([image_path, grt_path]) diff --git a/dygraph/paddleseg/models/__init__.py b/dygraph/paddleseg/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..939b855d5aababdf06216fbd29d3cd7334db7823 --- /dev/null +++ b/dygraph/paddleseg/models/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .backbones import * +from .losses import * +from .unet import UNet +from .deeplab import * +from .fcn import * +from .pspnet import * +from .ocrnet import * +from .fast_scnn import * +from .gcnet import * +from .ann import * diff --git a/dygraph/paddleseg/models/ann.py b/dygraph/paddleseg/models/ann.py new file mode 100644 index 0000000000000000000000000000000000000000..48c381d26308ac6c6632abcd202b84409e22e7f7 --- /dev/null +++ b/dygraph/paddleseg/models/ann.py @@ -0,0 +1,439 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn.functional as F +from paddle import nn +from paddleseg.cvlibs import manager +from paddleseg.models.common import layer_utils, model_utils +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class ANN(nn.Layer): + """ + The ANN implementation based on PaddlePaddle. + + The orginal artile refers to + Zhen, Zhu, et al. "Asymmetric Non-local Neural Networks for Semantic Segmentation." + (https://arxiv.org/pdf/1908.07678.pdf) + + It mainly consists of AFNB and APNB modules. + + Args: + + num_classes (int): the unique number of target classes. + + backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101. + + model_pretrained (str): the path of pretrained model. Defaullt to None. + + backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. + the first index will be taken as low-level features; the second one will be + taken as high-level features in AFNB module. Usually backbone consists of four + downsampling stage, and return an output of each stage, so we set default (2, 3), + which means taking feature map of the third stage and the fourth stage in backbone. + + backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. + + key_value_channels (int): the key and value channels of self-attention map in both AFNB and APNB modules. + Default to 256. + + inter_channels (int): both input and output channels of APNB modules. + + psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8). + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True. + + """ + + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + backbone_indices=(2, 3), + backbone_channels=(1024, 2048), + key_value_channels=256, + inter_channels=512, + psp_size=(1, 3, 6, 8), + enable_auxiliary_loss=True): + super(ANN, self).__init__() + + self.backbone = backbone + + low_in_channels = backbone_channels[0] + high_in_channels = backbone_channels[1] + + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + sizes=([1]), + psp_size=psp_size) + + self.context = nn.Sequential( + layer_utils.ConvBnRelu( + in_channels=high_in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1), + APNB( + in_channels=inter_channels, + out_channels=inter_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + sizes=([1]), + psp_size=psp_size)) + + self.cls = nn.Conv2d( + in_channels=inter_channels, + out_channels=num_classes, + kernel_size=1) + self.auxlayer = model_utils.AuxLayer( + in_channels=low_in_channels, + inter_channels=low_in_channels // 2, + out_channels=num_classes, + dropout_prob=0.05) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + _, feat_list = self.backbone(input) + low_level_x = feat_list[self.backbone_indices[0]] + high_level_x = feat_list[self.backbone_indices[1]] + x = self.fusion(low_level_x, high_level_x) + x = self.context(x) + logit = self.cls(x) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(low_level_x) + auxiliary_logit = F.resize_bilinear(auxiliary_logit, input.shape[2:]) + logit_list.append(auxiliary_logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + + Args: + pretrained_model ([str], optional): the pretrained_model path of backbone. Defaults to None. + """ + + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self.backbone, pretrained_model) + + +class AFNB(nn.Layer): + """ + Asymmetric Fusion Non-local Block + + Args: + low_in_channels (int): low-level-feature channels. + + high_in_channels (int): high-level-feature channels. + + out_channels (int): out channels of AFNB module. + + key_channels (int): the key channels in self-attention block. + + value_channels (int): the value channels in self-attention block. + + dropout_prob (float): the dropout rate of output. + + sizes (tuple): the number of AFNB modules. Default to ([1]). + + psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8). + + """ + + def __init__(self, + low_in_channels, + high_in_channels, + out_channels, + key_channels, + value_channels, + dropout_prob, + sizes=([1]), + psp_size=(1, 3, 6, 8)): + super(AFNB, self).__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_AFNB(low_in_channels, high_in_channels, + key_channels, value_channels, out_channels, + size) for size in sizes + ]) + self.conv_bn = layer_utils.ConvBn( + in_channels=out_channels + high_in_channels, + out_channels=out_channels, + kernel_size=1) + self.dropout_prob = dropout_prob + + def forward(self, low_feats, high_feats): + priors = [stage(low_feats, high_feats) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, high_feats], axis=1)) + output = F.dropout(output, p=self.dropout_prob) # dropout_prob + + return output + + +class APNB(nn.Layer): + """ + Asymmetric Pyramid Non-local Block + + Args: + in_channels (int): the input channels of APNB module. + + out_channels (int): out channels of APNB module. + + key_channels (int): the key channels in self-attention block. + + value_channels (int): the value channels in self-attention block. + + dropout_prob (float): the dropout rate of output. + + sizes (tuple): the number of AFNB modules. Default to ([1]). + + psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8). + + """ + + def __init__(self, + in_channels, + out_channels, + key_channels, + value_channels, + dropout_prob, + sizes=([1]), + psp_size=(1, 3, 6, 8)): + super(APNB, self).__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_APNB(in_channels, out_channels, key_channels, + value_channels, size) for size in sizes + ]) + self.conv_bn = layer_utils.ConvBnRelu( + in_channels=in_channels * 2, + out_channels=out_channels, + kernel_size=1) + self.dropout_prob = dropout_prob + + def forward(self, feats): + priors = [stage(feats) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, feats], axis=1)) + output = F.dropout(output, p=self.dropout_prob) # dropout_prob + + return output + + +def _pp_module(x, psp_size): + n, c, h, w = x.shape + priors = [] + for size in psp_size: + feat = F.adaptive_pool2d(x, pool_size=size, pool_type="avg") + feat = paddle.reshape(feat, shape=(n, c, -1)) + priors.append(feat) + center = paddle.concat(priors, axis=-1) + return center + + +class SelfAttentionBlock_AFNB(nn.Layer): + """ + Self-Attention Block for AFNB module. + + Args: + low_in_channels (int): low-level-feature channels. + + high_in_channels (int): high-level-feature channels. + + key_channels (int): the key channels in self-attention block. + + value_channels (int): the value channels in self-attention block. + + out_channels (int): out channels of AFNB module. + + scale (int): pooling size. Defaut to 1. + + psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8). + """ + + def __init__(self, + low_in_channels, + high_in_channels, + key_channels, + value_channels, + out_channels=None, + scale=1, + psp_size=(1, 3, 6, 8)): + super(SelfAttentionBlock_AFNB, self).__init__() + + self.scale = scale + self.in_channels = low_in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + if out_channels == None: + self.out_channels = high_in_channels + self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max") + self.f_key = layer_utils.ConvBnRelu( + in_channels=low_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_query = layer_utils.ConvBnRelu( + in_channels=high_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_value = nn.Conv2d( + in_channels=low_in_channels, + out_channels=value_channels, + kernel_size=1) + + self.W = nn.Conv2d( + in_channels=value_channels, + out_channels=out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, low_feats, high_feats): + batch_size, _, h, w = high_feats.shape + + value = self.f_value(low_feats) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, (0, 2, 1)) + + query = self.f_query(high_feats) + query = paddle.reshape(query, shape=(batch_size, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(low_feats) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels ** -.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + context = paddle.reshape( + context, + shape=[batch_size, self.value_channels, *high_feats.shape[2:]]) + + context = self.W(context) + + return context + + +class SelfAttentionBlock_APNB(nn.Layer): + """ + Self-Attention Block for APNB module. + + Args: + in_channels (int): the input channels of APNB module. + + out_channels (int): out channels of APNB module. + + key_channels (int): the key channels in self-attention block. + + value_channels (int): the value channels in self-attention block. + + scale (int): pooling size. Defaut to 1. + + psp_size (tuple): the out size of pooled feature maps. Default to (1, 3, 6, 8). + """ + + def __init__(self, + in_channels, + out_channels, + key_channels, + value_channels, + scale=1, + psp_size=(1, 3, 6, 8)): + super(SelfAttentionBlock_APNB, self).__init__() + + self.scale = scale + self.in_channels = in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + + self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max") + self.f_key = layer_utils.ConvBnRelu( + in_channels=self.in_channels, + out_channels=self.key_channels, + kernel_size=1) + self.f_query = self.f_key + self.f_value = nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.value_channels, + kernel_size=1) + self.W = nn.Conv2d( + in_channels=self.value_channels, + out_channels=self.out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, x): + batch_size, _, h, w = x.shape + if self.scale > 1: + x = self.pool(x) + + value = self.f_value(x) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, perm=(0, 2, 1)) + + query = self.f_query(x) + query = paddle.reshape( + query, shape=(batch_size, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(x) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels ** -.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + context = paddle.reshape( + context, shape=[batch_size, self.value_channels, *x.shape[2:]]) + context = self.W(context) + + return context diff --git a/dygraph/paddleseg/models/backbones/__init__.py b/dygraph/paddleseg/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8bc32c14b408c2048a394eb1fbf525c5fe91ffa7 --- /dev/null +++ b/dygraph/paddleseg/models/backbones/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .hrnet import * +from .resnet_vd import * +from .xception_deeplab import * +from .mobilenetv3 import * diff --git a/dygraph/paddleseg/models/backbones/hrnet.py b/dygraph/paddleseg/models/backbones/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d66f1c6de5efc96adf9f919583e4ccafda986222 --- /dev/null +++ b/dygraph/paddleseg/models/backbones/hrnet.py @@ -0,0 +1,850 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear +from paddle.fluid.initializer import Normal +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.cvlibs import manager +from paddleseg.utils import utils +from paddleseg.cvlibs import param_init + +__all__ = [ + "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30", + "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64" +] + + +class HRNet(fluid.dygraph.Layer): + """ + HRNet:Deep High-Resolution Representation Learning for Visual Recognition + https://arxiv.org/pdf/1908.07919.pdf. + + Args: + backbone_pretrained (str): the path of pretrained model. + stage1_num_modules (int): number of modules for stage1. Default 1. + stage1_num_blocks (list): number of blocks per module for stage1. Default [4]. + stage1_num_channels (list): number of channels per branch for stage1. Default [64]. + stage2_num_modules (int): number of modules for stage2. Default 1. + stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4] + stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36]. + stage3_num_modules (int): number of modules for stage3. Default 4. + stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4] + stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72]. + stage4_num_modules (int): number of modules for stage4. Default 3. + stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4] + stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144]. + has_se (bool): whether to use Squeeze-and-Excitation module. Default False. + """ + + def __init__(self, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + has_se=False): + super(HRNet, self).__init__() + + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + + self.conv_layer1_1 = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="layer1_1") + + self.conv_layer1_2 = ConvBNLayer( + num_channels=64, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name="layer1_2") + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], + out_channels=self.stage2_num_channels, + name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2") + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, + out_channels=self.stage3_num_channels, + name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3") + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, + out_channels=self.stage4_num_channels, + name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4") + + def forward(self, x, label=None, mode='train'): + input_shape = x.shape[2:] + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w)) + x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w)) + x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w)) + x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act="relu", + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) + self._batch_norm = BatchNorm(num_filters) + self.act = act + + def forward(self, input): + y = self._conv(input) + y = self._batch_norm(y) + if self.act == 'relu': + y = fluid.layers.relu(y) + return y + + +class Layer1(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + num_blocks, + has_se=False, + name=None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, input): + conv = input + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(fluid.dygraph.Layer): + def __init__(self, in_channels, out_channels, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + ConvBNLayer( + num_channels=in_channels[i], + num_filters=out_channels[i], + filter_size=3, + name=name + '_layer_' + str(i + 1))) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + ConvBNLayer( + num_channels=in_channels[-1], + num_filters=out_channels[i], + filter_size=3, + stride=2, + name=name + '_layer_' + str(i + 1))) + self.conv_bn_func_list.append(residual) + + def forward(self, input): + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(input[idx]) + else: + if idx < len(input): + outs.append(conv_bn_func(input[idx])) + else: + outs.append(conv_bn_func(input[-1])) + return outs + + +class Branches(fluid.dygraph.Layer): + def __init__(self, + num_blocks, + in_channels, + out_channels, + has_se=False, + name=None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, inputs): + outs = [] + for idx, input in enumerate(inputs): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + has_se, + stride=1, + downsample=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act="relu", + name=name + "_conv1", + ) + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_conv2") + self.conv3 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_conv3") + + if self.downsample: + self.conv_down = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_downsample") + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, input): + residual = input + conv1 = self.conv1(input) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(input) + + if self.has_se: + conv3 = self.se(conv3) + + y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu") + return y + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride=1, + has_se=False, + downsample=False, + name=None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_conv1") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + act=None, + name=name + "_conv2") + + if self.downsample: + self.conv_down = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + act="relu", + name=name + "_downsample") + + if self.has_se: + self.se = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, input): + residual = input + conv1 = self.conv1(input) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(input) + + if self.has_se: + conv2 = self.se(conv2) + + y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu") + return y + + +class SELayer(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = Linear( + num_channels, + med_ch, + act="relu", + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_sqz_weights"), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = Linear( + med_ch, + num_filters, + act="sigmoid", + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_exc_weights"), + bias_attr=ParamAttr(name=name + '_exc_offset')) + + def forward(self, input): + pool = self.pool2d_gap(input) + pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + excitation = self.excitation(squeeze) + excitation = fluid.layers.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = input * excitation + return out + + +class Stage(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_modules, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1))) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1))) + + self.stage_func_list.append(stage_func) + + def forward(self, input): + out = input + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, + in_channels=num_channels, + out_channels=num_filters, + has_se=has_se, + name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name) + + def forward(self, input): + out = self.branches_func(input) + out = self.fuse_func(out) + return out + + +class FuseLayers(fluid.dygraph.Layer): + def __init__(self, + in_channels, + out_channels, + multi_scale_output=True, + name=None): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + residual_func = None + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + ConvBNLayer( + num_channels=in_channels[j], + num_filters=out_channels[i], + filter_size=1, + stride=1, + act=None, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1))) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[i], + filter_size=3, + stride=2, + act=None, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1))) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + ConvBNLayer( + num_channels=pre_num_filters, + num_filters=out_channels[j], + filter_size=3, + stride=2, + act="relu", + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1))) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, input): + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = input[i] + residual_shape = residual.shape[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](input[j]) + residual_func_idx += 1 + + y = fluid.layers.resize_bilinear( + input=y, out_shape=residual_shape) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = input[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + layer_helper = LayerHelper(self.full_name(), act='relu') + residual = layer_helper.append_activation(residual) + outs.append(residual) + + return outs + + +class LastClsOut(fluid.dygraph.Layer): + def __init__(self, + num_channel_list, + has_se, + num_filters_list=[32, 64, 128, 256], + name=None): + super(LastClsOut, self).__init__() + + self.func_list = [] + for idx in range(len(num_channel_list)): + func = self.add_sublayer( + "conv_{}_conv_{}".format(name, idx + 1), + BottleneckBlock( + num_channels=num_channel_list[idx], + num_filters=num_filters_list[idx], + has_se=has_se, + downsample=True, + name=name + 'conv_' + str(idx + 1))) + self.func_list.append(func) + + def forward(self, inputs): + outs = [] + for idx, input in enumerate(inputs): + out = self.func_list[idx](input) + outs.append(out) + return outs + + +@manager.BACKBONES.add_component +def HRNet_W18_Small_V1(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[1], + stage1_num_channels=[32], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[16, 32], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[16, 32, 64], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[16, 32, 64, 128], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W18_Small_V2(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[2], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[18, 36], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[18, 36, 72, 144], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W18(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W30(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[30, 60], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[30, 60, 120], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[30, 60, 120, 240], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W32(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[32, 64], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[32, 64, 128], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[32, 64, 128, 256], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W40(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[40, 80], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[40, 80, 160], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[40, 80, 160, 320], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W44(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[44, 88], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[44, 88, 176], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[44, 88, 176, 352], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W48(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[48, 96], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[48, 96, 192], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[48, 96, 192, 384], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W60(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[60, 120], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[60, 120, 240], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[60, 120, 240, 480], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W64(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[64, 128], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[64, 128, 256], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[64, 128, 256, 512], + **kwargs) + return model diff --git a/dygraph/paddleseg/models/backbones/mobilenetv3.py b/dygraph/paddleseg/models/backbones/mobilenetv3.py new file mode 100644 index 0000000000000000000000000000000000000000..6204d7733a45326a70b7cbc423820b987b046708 --- /dev/null +++ b/dygraph/paddleseg/models/backbones/mobilenetv3.py @@ -0,0 +1,451 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.models.common import layer_utils +from paddleseg.cvlibs import manager +from paddleseg.utils import utils + +__all__ = [ + "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", + "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0", + "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35", + "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75", + "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25" +] + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def get_padding_same(kernel_size, dilation_rate): + """ + SAME padding implementation given kernel_size and dilation_rate. + The calculation formula as following: + (F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new + where F: a feature map + k: kernel size, r: dilation rate, p: padding value, s: stride + F_new: new feature map + Args: + kernel_size (int) + dilation_rate (int) + + Returns: + padding_same (int): padding value + """ + k = kernel_size + r = dilation_rate + padding_same = (k + (k - 1) * (r - 1) - 1) // 2 + + return padding_same + + +class MobileNetV3(fluid.dygraph.Layer): + def __init__(self, + backbone_pretrained=None, + scale=1.0, + model_name="small", + class_dim=1000, + output_stride=None): + super(MobileNetV3, self).__init__() + + inplanes = 16 + if model_name == "large": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], # output 1 -> out_index=2 + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], # output 2 -> out_index=5 + [3, 240, 80, False, "hard_swish", 2], + [3, 200, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 480, 112, True, "hard_swish", 1], + [3, 672, 112, True, "hard_swish", + 1], # output 3 -> out_index=11 + [5, 672, 160, True, "hard_swish", 2], + [5, 960, 160, True, "hard_swish", 1], + [5, 960, 160, True, "hard_swish", + 1], # output 3 -> out_index=14 + ] + self.out_indices = [2, 5, 11, 14] + + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + elif model_name == "small": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, "relu", 2], # output 1 -> out_index=0 + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], # output 2 -> out_index=3 + [5, 96, 40, True, "hard_swish", 2], + [5, 240, 40, True, "hard_swish", 1], + [5, 240, 40, True, "hard_swish", 1], + [5, 120, 48, True, "hard_swish", 1], + [5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7 + [5, 288, 96, True, "hard_swish", 2], + [5, 576, 96, True, "hard_swish", 1], + [5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10 + ] + self.out_indices = [0, 3, 7, 10] + + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError( + "mode[{}_model] is not implemented!".format(model_name)) + + ################################################### + # modify stride and dilation based on output_stride + self.dilation_cfg = [1] * len(self.cfg) + self.modify_bottle_params(output_stride=output_stride) + ################################################### + + self.conv1 = ConvBNLayer( + in_c=3, + out_c=make_divisible(inplanes * scale), + filter_size=3, + stride=2, + padding=1, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv1") + + self.block_list = [] + + inplanes = make_divisible(inplanes * scale) + for i, (k, exp, c, se, nl, s) in enumerate(self.cfg): + ###################################### + # add dilation rate + dilation_rate = self.dilation_cfg[i] + ###################################### + self.block_list.append( + ResidualUnit( + in_c=inplanes, + mid_c=make_divisible(scale * exp), + out_c=make_divisible(scale * c), + filter_size=k, + stride=s, + dilation=dilation_rate, + use_se=se, + act=nl, + name="conv" + str(i + 2))) + self.add_sublayer( + sublayer=self.block_list[-1], name="conv" + str(i + 2)) + inplanes = make_divisible(scale * c) + + self.last_second_conv = ConvBNLayer( + in_c=inplanes, + out_c=make_divisible(scale * self.cls_ch_squeeze), + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv_last") + + self.pool = Pool2D( + pool_type="avg", global_pooling=True, use_cudnn=False) + + self.last_conv = Conv2D( + num_channels=make_divisible(scale * self.cls_ch_squeeze), + num_filters=self.cls_ch_expand, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name="last_1x1_conv_weights"), + bias_attr=False) + + self.out = Linear( + input_dim=self.cls_ch_expand, + output_dim=class_dim, + param_attr=ParamAttr("fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + self.init_weight(backbone_pretrained) + + def modify_bottle_params(self, output_stride=None): + + if output_stride is not None and output_stride % 2 != 0: + raise Exception("output stride must to be even number") + if output_stride is not None: + stride = 2 + rate = 1 + for i, _cfg in enumerate(self.cfg): + stride = stride * _cfg[-1] + if stride > output_stride: + rate = rate * _cfg[-1] + self.cfg[i][-1] = 1 + + self.dilation_cfg[i] = rate + + def forward(self, inputs, label=None, dropout_prob=0.2): + x = self.conv1(inputs) + # A feature list saves each downsampling feature. + feat_list = [] + for i, block in enumerate(self.block_list): + x = block(x) + if i in self.out_indices: + feat_list.append(x) + #print("block {}:".format(i),x.shape, self.dilation_cfg[i]) + x = self.last_second_conv(x) + x = self.pool(x) + x = self.last_conv(x) + x = fluid.layers.hard_swish(x) + x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob) + x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]]) + x = self.out(x) + + return x, feat_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + in_c, + out_c, + filter_size, + stride, + padding, + dilation=1, + num_groups=1, + if_act=True, + act=None, + use_cudnn=True, + name=""): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.act = act + + self.conv = fluid.dygraph.Conv2D( + num_channels=in_c, + num_filters=out_c, + filter_size=filter_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=num_groups, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + use_cudnn=use_cudnn, + act=None) + self.bn = BatchNorm( + num_features=out_c, + weight_attr=ParamAttr( + name=name + "_bn_scale", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + bias_attr=ParamAttr( + name=name + "_bn_offset", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0))) + + self._act_op = layer_utils.Activation(act=None) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + if self.act == "relu": + x = fluid.layers.relu(x) + elif self.act == "hard_swish": + x = fluid.layers.hard_swish(x) + else: + print("The activation function is selected incorrectly.") + exit() + return x + + +class ResidualUnit(fluid.dygraph.Layer): + def __init__(self, + in_c, + mid_c, + out_c, + filter_size, + stride, + use_se, + dilation=1, + act=None, + name=''): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_c == out_c + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_c=in_c, + out_c=mid_c, + filter_size=1, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + "_expand") + + self.bottleneck_conv = ConvBNLayer( + in_c=mid_c, + out_c=mid_c, + filter_size=filter_size, + stride=stride, + padding=get_padding_same( + filter_size, + dilation), #int((filter_size - 1) // 2) + (dilation - 1), + dilation=dilation, + num_groups=mid_c, + if_act=True, + act=act, + name=name + "_depthwise") + if self.if_se: + self.mid_se = SEModule(mid_c, name=name + "_se") + self.linear_conv = ConvBNLayer( + in_c=mid_c, + out_c=out_c, + filter_size=1, + stride=1, + padding=0, + if_act=False, + act=None, + name=name + "_linear") + self.dilation = dilation + + def forward(self, inputs): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = fluid.layers.elementwise_add(inputs, x) + return x + + +class SEModule(fluid.dygraph.Layer): + def __init__(self, channel, reduction=4, name=""): + super(SEModule, self).__init__() + self.avg_pool = fluid.dygraph.Pool2D( + pool_type="avg", global_pooling=True, use_cudnn=False) + self.conv1 = fluid.dygraph.Conv2D( + num_channels=channel, + num_filters=channel // reduction, + filter_size=1, + stride=1, + padding=0, + act="relu", + param_attr=ParamAttr(name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + self.conv2 = fluid.dygraph.Conv2D( + num_channels=channel // reduction, + num_filters=channel, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = self.conv2(outputs) + outputs = fluid.layers.hard_sigmoid(outputs) + return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0) + + +def MobileNetV3_small_x0_35(**kwargs): + model = MobileNetV3(model_name="small", scale=0.35, **kwargs) + return model + + +def MobileNetV3_small_x0_5(**kwargs): + model = MobileNetV3(model_name="small", scale=0.5, **kwargs) + return model + + +def MobileNetV3_small_x0_75(**kwargs): + model = MobileNetV3(model_name="small", scale=0.75, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV3_small_x1_0(**kwargs): + model = MobileNetV3(model_name="small", scale=1.0, **kwargs) + return model + + +def MobileNetV3_small_x1_25(**kwargs): + model = MobileNetV3(model_name="small", scale=1.25, **kwargs) + return model + + +def MobileNetV3_large_x0_35(**kwargs): + model = MobileNetV3(model_name="large", scale=0.35, **kwargs) + return model + + +def MobileNetV3_large_x0_5(**kwargs): + model = MobileNetV3(model_name="large", scale=0.5, **kwargs) + return model + + +def MobileNetV3_large_x0_75(**kwargs): + model = MobileNetV3(model_name="large", scale=0.75, **kwargs) + return model + + +@manager.BACKBONES.add_component +def MobileNetV3_large_x1_0(**kwargs): + model = MobileNetV3(model_name="large", scale=1.0, **kwargs) + return model + + +def MobileNetV3_large_x1_25(**kwargs): + model = MobileNetV3(model_name="large", scale=1.25, **kwargs) + return model diff --git a/dygraph/paddleseg/models/backbones/resnet_vd.py b/dygraph/paddleseg/models/backbones/resnet_vd.py new file mode 100644 index 0000000000000000000000000000000000000000..d7dfc66fd5dc44a6a27c04eea73dc692f857c61c --- /dev/null +++ b/dygraph/paddleseg/models/backbones/resnet_vd.py @@ -0,0 +1,417 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import math + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.utils import utils +from paddleseg.models.common import layer_utils +from paddleseg.cvlibs import manager + +__all__ = [ + "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" +] + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + dilation=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = Pool2D( + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + weight_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset')) + self._act_op = layer_utils.Activation(act=act) + + def forward(self, inputs): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + dilation=1, + name=None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + + #################################################################### + # If given dilation rate > 1, using corresponding padding + if self.dilation > 1: + padding = self.dilation + y = fluid.layers.pad( + y, [0, 0, 0, 0, padding, padding, padding, padding]) + ##################################################################### + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class BasicBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + if_first=False, + name=None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv1) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNet_vd(fluid.dygraph.Layer): + def __init__(self, + backbone_pretrained=None, + layers=50, + class_dim=1000, + output_stride=None, + multi_grid=(1, 2, 4)): + super(ResNet_vd, self).__init__() + + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + num_channels=3, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = ConvBNLayer( + num_channels=32, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = ConvBNLayer( + num_channels=32, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4) + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + #print("stage {}, block {}: dilation rate".format(block, i), dilation_rate) + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + self.init_weight(backbone_pretrained) + + def forward(self, inputs): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for i, stage in enumerate(self.stage_list): + for j, block in enumerate(stage): + y = block(y) + #print("stage {} block {}".format(i+1, j+1), y.shape) + feat_list.append(y) + + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y, feat_list + + # def init_weight(self, pretrained_model=None): + + # if pretrained_model is not None: + # if os.path.exists(pretrained_model): + # utils.load_pretrained_model(self, pretrained_model) + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +def ResNet18_vd(**args): + model = ResNet_vd(layers=18, **args) + return model + + +def ResNet34_vd(**args): + model = ResNet_vd(layers=34, **args) + return model + + +@manager.BACKBONES.add_component +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model + + +@manager.BACKBONES.add_component +def ResNet101_vd(**args): + model = ResNet_vd(layers=101, **args) + return model + + +def ResNet152_vd(**args): + model = ResNet_vd(layers=152, **args) + return model + + +def ResNet200_vd(**args): + model = ResNet_vd(layers=200, **args) + return model diff --git a/dygraph/paddleseg/models/backbones/xception_deeplab.py b/dygraph/paddleseg/models/backbones/xception_deeplab.py new file mode 100644 index 0000000000000000000000000000000000000000..f512e31ab372b8bc453d8d0506bbc45839a08d27 --- /dev/null +++ b/dygraph/paddleseg/models/backbones/xception_deeplab.py @@ -0,0 +1,452 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.models.common import layer_utils +from paddleseg.cvlibs import manager +from paddleseg.utils import utils + +__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"] + + +def check_data(data, number): + if type(data) == int: + return [data] * number + assert len(data) == number + return data + + +def check_stride(s, os): + if s <= os: + return True + else: + return False + + +def check_points(count, points): + if points is None: + return False + else: + if isinstance(points, list): + return (True if count in points else False) + else: + return (True if count == points else False) + + +def gen_bottleneck_params(backbone='xception_65'): + if backbone == 'xception_65': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_41': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (8, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_71': + bottleneck_params = { + "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + else: + raise Exception( + "xception backbont only support xception_41/xception_65/xception_71" + ) + return bottleneck_params + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + input_channels, + output_channels, + filter_size, + stride=1, + padding=0, + act=None, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=input_channels, + num_filters=output_channels, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(name=name + "/weights"), + bias_attr=False) + self._bn = BatchNorm( + num_features=output_channels, + epsilon=1e-3, + momentum=0.99, + weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/BatchNorm/beta")) + + self._act_op = layer_utils.Activation(act=act) + + def forward(self, inputs): + + return self._act_op(self._bn(self._conv(inputs))) + + +class Seperate_Conv(fluid.dygraph.Layer): + def __init__(self, + input_channels, + output_channels, + stride, + filter, + dilation=1, + act=None, + name=None): + super(Seperate_Conv, self).__init__() + + self._conv1 = Conv2D( + num_channels=input_channels, + num_filters=input_channels, + filter_size=filter, + stride=stride, + groups=input_channels, + padding=(filter) // 2 * dilation, + dilation=dilation, + param_attr=ParamAttr(name=name + "/depthwise/weights"), + bias_attr=False) + self._bn1 = BatchNorm( + input_channels, + epsilon=1e-3, + momentum=0.99, + weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta")) + + self._act_op1 = layer_utils.Activation(act=act) + + self._conv2 = Conv2D( + input_channels, + output_channels, + 1, + stride=1, + groups=1, + padding=0, + param_attr=ParamAttr(name=name + "/pointwise/weights"), + bias_attr=False) + self._bn2 = BatchNorm( + output_channels, + epsilon=1e-3, + momentum=0.99, + weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"), + bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta")) + + self._act_op2 = layer_utils.Activation(act=act) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._bn1(x) + x = self._act_op1(x) + x = self._conv2(x) + x = self._bn2(x) + x = self._act_op2(x) + return x + + +class Xception_Block(fluid.dygraph.Layer): + def __init__(self, + input_channels, + output_channels, + strides=1, + filter_size=3, + dilation=1, + skip_conv=True, + has_skip=True, + activation_fn_in_separable_conv=False, + name=None): + super(Xception_Block, self).__init__() + + repeat_number = 3 + output_channels = check_data(output_channels, repeat_number) + filter_size = check_data(filter_size, repeat_number) + strides = check_data(strides, repeat_number) + + self.has_skip = has_skip + self.skip_conv = skip_conv + self.activation_fn_in_separable_conv = activation_fn_in_separable_conv + if not activation_fn_in_separable_conv: + self._conv1 = Seperate_Conv( + input_channels, + output_channels[0], + stride=strides[0], + filter=filter_size[0], + dilation=dilation, + name=name + "/separable_conv1") + self._conv2 = Seperate_Conv( + output_channels[0], + output_channels[1], + stride=strides[1], + filter=filter_size[1], + dilation=dilation, + name=name + "/separable_conv2") + self._conv3 = Seperate_Conv( + output_channels[1], + output_channels[2], + stride=strides[2], + filter=filter_size[2], + dilation=dilation, + name=name + "/separable_conv3") + else: + self._conv1 = Seperate_Conv( + input_channels, + output_channels[0], + stride=strides[0], + filter=filter_size[0], + act="relu", + dilation=dilation, + name=name + "/separable_conv1") + self._conv2 = Seperate_Conv( + output_channels[0], + output_channels[1], + stride=strides[1], + filter=filter_size[1], + act="relu", + dilation=dilation, + name=name + "/separable_conv2") + self._conv3 = Seperate_Conv( + output_channels[1], + output_channels[2], + stride=strides[2], + filter=filter_size[2], + act="relu", + dilation=dilation, + name=name + "/separable_conv3") + + if has_skip and skip_conv: + self._short = ConvBNLayer( + input_channels, + output_channels[-1], + 1, + stride=strides[-1], + padding=0, + name=name + "/shortcut") + + def forward(self, inputs): + layer_helper = LayerHelper(self.full_name(), act='relu') + if not self.activation_fn_in_separable_conv: + x = layer_helper.append_activation(inputs) + x = self._conv1(x) + x = layer_helper.append_activation(x) + x = self._conv2(x) + x = layer_helper.append_activation(x) + x = self._conv3(x) + else: + x = self._conv1(inputs) + x = self._conv2(x) + x = self._conv3(x) + if self.has_skip is False: + return x + if self.skip_conv: + skip = self._short(inputs) + else: + skip = inputs + return fluid.layers.elementwise_add(x, skip) + + +class XceptionDeeplab(fluid.dygraph.Layer): + + #def __init__(self, backbone, class_dim=1000): + # add output_stride + def __init__(self, + backbone, + backbone_pretrained=None, + output_stride=16, + class_dim=1000): + + super(XceptionDeeplab, self).__init__() + + bottleneck_params = gen_bottleneck_params(backbone) + self.backbone = backbone + + self._conv1 = ConvBNLayer( + 3, + 32, + 3, + stride=2, + padding=1, + act="relu", + name=self.backbone + "/entry_flow/conv1") + self._conv2 = ConvBNLayer( + 32, + 64, + 3, + stride=1, + padding=1, + act="relu", + name=self.backbone + "/entry_flow/conv2") + """ + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) + } + + if output_stride == 16: + entry_block3_stride = 2 + middle_block_dilation = 1 + exit_block_dilations = (1, 2) + elif output_stride == 8: + entry_block3_stride = 1 + middle_block_dilation = 2 + exit_block_dilations = (2, 4) + + """ + self.block_num = bottleneck_params["entry_flow"][0] + self.strides = bottleneck_params["entry_flow"][1] + self.chns = bottleneck_params["entry_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + + self.entry_flow = [] + self.middle_flow = [] + + self.stride = 2 + self.output_stride = output_stride + s = self.stride + + for i in range(self.block_num): + stride = self.strides[i] if check_stride(s * self.strides[i], + self.output_stride) else 1 + xception_block = self.add_sublayer( + self.backbone + "/entry_flow/block" + str(i + 1), + Xception_Block( + input_channels=64 if i == 0 else self.chns[i - 1], + output_channels=self.chns[i], + strides=[1, 1, self.stride], + name=self.backbone + "/entry_flow/block" + str(i + 1))) + self.entry_flow.append(xception_block) + s = s * stride + self.stride = s + + self.block_num = bottleneck_params["middle_flow"][0] + self.strides = bottleneck_params["middle_flow"][1] + self.chns = bottleneck_params["middle_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + s = self.stride + + for i in range(self.block_num): + stride = self.strides[i] if check_stride(s * self.strides[i], + self.output_stride) else 1 + xception_block = self.add_sublayer( + self.backbone + "/middle_flow/block" + str(i + 1), + Xception_Block( + input_channels=728, + output_channels=728, + strides=[1, 1, self.strides[i]], + skip_conv=False, + name=self.backbone + "/middle_flow/block" + str(i + 1))) + self.middle_flow.append(xception_block) + s = s * stride + self.stride = s + + self.block_num = bottleneck_params["exit_flow"][0] + self.strides = bottleneck_params["exit_flow"][1] + self.chns = bottleneck_params["exit_flow"][2] + self.strides = check_data(self.strides, self.block_num) + self.chns = check_data(self.chns, self.block_num) + s = self.stride + stride = self.strides[0] if check_stride(s * self.strides[0], + self.output_stride) else 1 + self._exit_flow_1 = Xception_Block( + 728, + self.chns[0], [1, 1, stride], + name=self.backbone + "/exit_flow/block1") + s = s * stride + stride = self.strides[1] if check_stride(s * self.strides[1], + self.output_stride) else 1 + self._exit_flow_2 = Xception_Block( + self.chns[0][-1], + self.chns[1], [1, 1, stride], + dilation=2, + has_skip=False, + activation_fn_in_separable_conv=True, + name=self.backbone + "/exit_flow/block2") + s = s * stride + + self.stride = s + + self._drop = Dropout(p=0.5) + self._pool = Pool2D(pool_type="avg", global_pooling=True) + self._fc = Linear( + self.chns[1][-1], + class_dim, + param_attr=ParamAttr(name="fc_weights"), + bias_attr=ParamAttr(name="fc_bias")) + + self.init_weight(backbone_pretrained) + + def forward(self, inputs): + x = self._conv1(inputs) + x = self._conv2(x) + feat_list = [] + for i, ef in enumerate(self.entry_flow): + x = ef(x) + if i == 0: + feat_list.append(x) + for mf in self.middle_flow: + x = mf(x) + x = self._exit_flow_1(x) + x = self._exit_flow_2(x) + feat_list.append(x) + + x = self._drop(x) + x = self._pool(x) + x = fluid.layers.squeeze(x, axes=[2, 3]) + x = self._fc(x) + return x, feat_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +def Xception41_deeplab(**args): + model = XceptionDeeplab('xception_41', **args) + return model + + +@manager.BACKBONES.add_component +def Xception65_deeplab(**args): + model = XceptionDeeplab("xception_65", **args) + return model + + +def Xception71_deeplab(**args): + model = XceptionDeeplab("xception_71", **args) + return model diff --git a/dygraph/paddleseg/models/common/__init__.py b/dygraph/paddleseg/models/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9f30b50f2fc80c9effd59dbf3c134de66de04c44 --- /dev/null +++ b/dygraph/paddleseg/models/common/__init__.py @@ -0,0 +1,17 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import layer_utils +from . import model_utils \ No newline at end of file diff --git a/dygraph/paddleseg/models/common/layer_utils.py b/dygraph/paddleseg/models/common/layer_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d41ebb130cbbca11feebfd87e030628ea44cd27 --- /dev/null +++ b/dygraph/paddleseg/models/common/layer_utils.py @@ -0,0 +1,143 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import nn +import paddle.nn.functional as F +from paddle.nn import Conv2d +from paddle.nn import SyncBatchNorm as BatchNorm +from paddle.nn.layer import activation + + +class ConvBnRelu(nn.Layer): + def __init__(self, in_channels, out_channels, kernel_size, **kwargs): + + super(ConvBnRelu, self).__init__() + + self.conv = Conv2d(in_channels, out_channels, kernel_size, **kwargs) + + self.batch_norm = BatchNorm(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.batch_norm(x) + x = F.relu(x) + return x + + +class ConvBn(nn.Layer): + def __init__(self, in_channels, out_channels, kernel_size, **kwargs): + + super(ConvBn, self).__init__() + + self.conv = Conv2d(in_channels, out_channels, kernel_size, **kwargs) + + self.batch_norm = BatchNorm(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.batch_norm(x) + return x + + +class ConvReluPool(nn.Layer): + def __init__(self, in_channels, out_channels): + super(ConvReluPool, self).__init__() + self.conv = Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + dilation=1) + + def forward(self, x): + x = self.conv(x) + x = F.relu(x) + x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) + return x + + +# class ConvBnReluUpsample(nn.Layer): +# def __init__(self, in_channels, out_channels): +# super(ConvBnReluUpsample, self).__init__() +# self.conv_bn_relu = ConvBnRelu(in_channels, out_channels) + +# def forward(self, x, upsample_scale=2): +# x = self.conv_bn_relu(x) +# new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale] +# x = F.resize_bilinear(x, new_shape) +# return x + + +class DepthwiseConvBnRelu(nn.Layer): + def __init__(self, in_channels, out_channels, kernel_size, **kwargs): + super(DepthwiseConvBnRelu, self).__init__() + self.depthwise_conv = ConvBn( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBnRelu( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations + For example: + >>> relu = Activation("relu") + >>> print(relu) + + >>> sigmoid = Activation("sigmoid") + >>> print(sigmoid) + + >>> not_exit_one = Activation("not_exit_one") + KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + + Args: + act (str): the activation name in lowercase + """ + + def __init__(self, act=None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__all__ + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x): + + if self._act is not None: + return self.act_func(x) + else: + return x diff --git a/dygraph/paddleseg/models/common/model_utils.py b/dygraph/paddleseg/models/common/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7de39c8e77fad0021d3e910a9c02f3f6d774c32d --- /dev/null +++ b/dygraph/paddleseg/models/common/model_utils.py @@ -0,0 +1,170 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import nn +import paddle.nn.functional as F +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.models.common import layer_utils + + +class FCNHead(nn.Layer): + """ + The FCNHead implementation used in auxilary layer + + Args: + in_channels (int): the number of input channels + out_channels (int): the number of output channels + """ + + def __init__(self, in_channels, out_channels): + super(FCNHead, self).__init__() + + inter_channels = in_channels // 4 + self.conv_bn_relu = layer_utils.ConvBnRelu( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1) + + self.conv = nn.Conv2d( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = F.dropout(x, p=0.1) + x = self.conv(x) + return x + + +class AuxLayer(nn.Layer): + """ + The auxilary layer implementation for auxilary loss + + Args: + in_channels (int): the number of input channels. + inter_channels (int): intermediate channels. + out_channels (int): the number of output channels, which is usually num_classes. + """ + + def __init__(self, + in_channels, + inter_channels, + out_channels, + dropout_prob=0.1): + super(AuxLayer, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1) + + self.conv = nn.Conv2d( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + self.dropout_prob = dropout_prob + + def forward(self, x): + x = self.conv_bn_relu(x) + x = F.dropout(x, p=self.dropout_prob) + x = self.conv(x) + return x + + +class PPModule(nn.Layer): + """ + Pyramid pooling module + + Args: + in_channels (int): the number of intput channels to pyramid pooling module. + + out_channels (int): the number of output channels after pyramid pooling module. + + bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). + + dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True. + """ + + def __init__(self, + in_channels, + out_channels, + bin_sizes=(1, 2, 3, 6), + dim_reduction=True): + super(PPModule, self).__init__() + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_bn_relu2 = layer_utils.ConvBnRelu( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + def _make_stage(self, in_channels, out_channels, size): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimention reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + + Args: + in_channels (int): the number of intput channels to pyramid pooling module. + + size (int): the out size of the pooled layer. + + Returns: + conv (tensor): a tensor after Pyramid Pooling Module + """ + + # this paddle version does not support AdaptiveAvgPool2d, so skip it here. + # prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) + conv = layer_utils.ConvBnRelu( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return conv + + def forward(self, input): + cat_layers = [] + for i, stage in enumerate(self.stages): + size = self.bin_sizes[i] + x = F.adaptive_pool2d( + input, pool_size=(size, size), pool_type="max") + x = stage(x) + x = F.resize_bilinear(x, out_shape=input.shape[2:]) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out diff --git a/dygraph/paddleseg/models/deeplab.py b/dygraph/paddleseg/models/deeplab.py new file mode 100644 index 0000000000000000000000000000000000000000..7c7e0cb187808baa8c7543d8eda7773a53c5b0fc --- /dev/null +++ b/dygraph/paddleseg/models/deeplab.py @@ -0,0 +1,287 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn.functional as F +from paddle import nn +from paddleseg.cvlibs import manager +from paddleseg.models.common import layer_utils +from paddleseg.utils import utils + +__all__ = ['DeepLabV3P', 'DeepLabV3'] + + +@manager.MODELS.add_component +class DeepLabV3P(nn.Layer): + """ + The DeepLabV3Plus implementation based on PaddlePaddle. + + The orginal artile refers to + "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" + Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. + (https://arxiv.org/abs/1802.02611) + + The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder. + + Args: + num_classes (int): the unique number of target classes. + + backbone (paddle.nn.Layer): backbone network, currently support Xception65, Resnet101_vd. + + model_pretrained (str): the path of pretrained model. + + output_stride (int): the ratio of input size and final feature size. + Support 16 or 8. Default to 16. + + backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. + the first index will be taken as a low-level feature in Deconder component; + the second one will be taken as input of ASPP component. + Usually backbone consists of four downsampling stage, and return an output of + each stage, so we set default (0, 3), which means taking feature map of the first + stage in backbone as low-level feature used in Decoder, and feature map of the fourth + stage as input of ASPP. + + backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. + + """ + + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + backbone_indices=(0, 3), + backbone_channels=(256, 2048), + output_stride=16): + + super(DeepLabV3P, self).__init__() + + self.backbone = backbone + self.aspp = ASPP(output_stride, backbone_channels[1]) + self.decoder = Decoder(num_classes, backbone_channels[0]) + self.backbone_indices = backbone_indices + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + _, feat_list = self.backbone(input) + low_level_feat = feat_list[self.backbone_indices[0]] + x = feat_list[self.backbone_indices[1]] + x = self.aspp(x) + logit = self.decoder(x, low_level_feat) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +@manager.MODELS.add_component +class DeepLabV3(nn.Layer): + """ + The DeepLabV3 implementation based on PaddlePaddle. + + The orginal article refers to + "Rethinking Atrous Convolution for Semantic Image Segmentation" + Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam. + (https://arxiv.org/pdf/1706.05587.pdf) + + Args: + Refer to DeepLabV3P above + + """ + + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + backbone_indices=(3,), + backbone_channels=(2048,), + output_stride=16): + + super(DeepLabV3, self).__init__() + + self.backbone = backbone + self.aspp = ASPP(output_stride, backbone_channels[0]) + self.cls = nn.Conv2d( + in_channels=backbone_channels[0], + out_channels=num_classes, + kernel_size=1) + + self.backbone_indices = backbone_indices + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + _, feat_list = self.backbone(input) + x = feat_list[self.backbone_indices[0]] + logit = self.cls(x) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +class ImageAverage(nn.Layer): + """ + Global average pooling + + Args: + in_channels (int): the number of input channels. + + """ + + def __init__(self, in_channels): + super(ImageAverage, self).__init__() + self.conv_bn_relu = layer_utils.ConvBnRelu( + in_channels, out_channels=256, kernel_size=1) + + def forward(self, input): + x = paddle.reduce_mean(input, dim=[2, 3], keep_dim=True) + x = self.conv_bn_relu(x) + x = F.resize_bilinear(x, out_shape=input.shape[2:]) + return x + + +class ASPP(nn.Layer): + """ + Decoder module of DeepLabV3P model + + Args: + output_stride (int): the ratio of input size and final feature size. Support 16 or 8. + + in_channels (int): the number of input channels in decoder module. + + """ + + def __init__(self, output_stride, in_channels): + super(ASPP, self).__init__() + + if output_stride == 16: + aspp_ratios = (6, 12, 18) + elif output_stride == 8: + aspp_ratios = (12, 24, 36) + else: + raise NotImplementedError( + "Only support output_stride is 8 or 16, but received{}".format( + output_stride)) + + self.image_average = ImageAverage(in_channels=in_channels) + + # The first aspp using 1*1 conv + self.aspp1 = layer_utils.DepthwiseConvBnRelu( + in_channels=in_channels, out_channels=256, kernel_size=1) + + # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0] + self.aspp2 = layer_utils.DepthwiseConvBnRelu( + in_channels=in_channels, + out_channels=256, + kernel_size=3, + dilation=aspp_ratios[0], + padding=aspp_ratios[0]) + + # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1] + self.aspp3 = layer_utils.DepthwiseConvBnRelu( + in_channels=in_channels, + out_channels=256, + kernel_size=3, + dilation=aspp_ratios[1], + padding=aspp_ratios[1]) + + # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2] + self.aspp4 = layer_utils.DepthwiseConvBnRelu( + in_channels=in_channels, + out_channels=256, + kernel_size=3, + dilation=aspp_ratios[2], + padding=aspp_ratios[2]) + + # After concat op, using 1*1 conv + self.conv_bn_relu = layer_utils.ConvBnRelu( + in_channels=1280, out_channels=256, kernel_size=1) + + def forward(self, x): + + x1 = self.image_average(x) + x2 = self.aspp1(x) + x3 = self.aspp2(x) + x4 = self.aspp3(x) + x5 = self.aspp4(x) + x = paddle.concat([x1, x2, x3, x4, x5], axis=1) + + x = self.conv_bn_relu(x) + x = F.dropout(x, p=0.1) # dropout_prob + return x + + +class Decoder(nn.Layer): + """ + Decoder module of DeepLabV3P model + + Args: + num_classes (int): the number of classes. + + in_channels (int): the number of input channels in decoder module. + + """ + + def __init__(self, num_classes, in_channels): + super(Decoder, self).__init__() + + self.conv_bn_relu1 = layer_utils.ConvBnRelu( + in_channels=in_channels, out_channels=48, kernel_size=1) + + self.conv_bn_relu2 = layer_utils.DepthwiseConvBnRelu( + in_channels=304, out_channels=256, kernel_size=3, padding=1) + self.conv_bn_relu3 = layer_utils.DepthwiseConvBnRelu( + in_channels=256, out_channels=256, kernel_size=3, padding=1) + self.conv = nn.Conv2d( + in_channels=256, out_channels=num_classes, kernel_size=1) + + def forward(self, x, low_level_feat): + low_level_feat = self.conv_bn_relu1(low_level_feat) + x = F.resize_bilinear(x, low_level_feat.shape[2:]) + x = paddle.concat([x, low_level_feat], axis=1) + x = self.conv_bn_relu2(x) + x = self.conv_bn_relu3(x) + x = self.conv(x) + return x diff --git a/dygraph/paddleseg/models/fast_scnn.py b/dygraph/paddleseg/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..434f083e99d5337a51b3581f906b0a1fc518676e --- /dev/null +++ b/dygraph/paddleseg/models/fast_scnn.py @@ -0,0 +1,340 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.nn.functional as F +from paddle import nn +from paddleseg.cvlibs import manager +from paddleseg.models.common import layer_utils, model_utils + + +@manager.MODELS.add_component +class FastSCNN(nn.Layer): + """ + The FastSCNN implementation based on PaddlePaddle. + + As mentioned in the original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) + even for high resolution images (1024x2048). + + The orginal artile refers to + Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network." + (https://arxiv.org/pdf/1902.04502.pdf) + + Args: + + num_classes (int): the unique number of target classes. Default to 2. + + model_pretrained (str): the path of pretrained model. Defaullt to None. + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. + if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False. + + """ + + def __init__(self, + num_classes, + model_pretrained=None, + enable_auxiliary_loss=True): + + super(FastSCNN, self).__init__() + + self.learning_to_downsample = LearningToDownsample(32, 48, 64) + self.global_feature_extractor = GlobalFeatureExtractor( + 64, [64, 96, 128], 128, 6, [3, 3, 3]) + self.feature_fusion = FeatureFusionModule(64, 128, 128) + self.classifier = Classifier(128, num_classes) + + if enable_auxiliary_loss: + self.auxlayer = model_utils.AuxLayer(64, 32, num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + higher_res_features = self.learning_to_downsample(input) + x = self.global_feature_extractor(higher_res_features) + x = self.feature_fusion(higher_res_features, x) + logit = self.classifier(x) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(higher_res_features) + auxiliary_logit = F.resize_bilinear(auxiliary_logit, + input.shape[2:]) + logit_list.append(auxiliary_logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +class LearningToDownsample(nn.Layer): + """ + Learning to downsample module. + + This module consists of three downsampling blocks (one Conv and two separable Conv) + + Args: + dw_channels1 (int): the input channels of the first sep conv. Default to 32. + + dw_channels2 (int): the input channels of the second sep conv. Default to 48. + + out_channels (int): the output channels of LearningToDownsample module. Default to 64. + """ + + def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): + super(LearningToDownsample, self).__init__() + + self.conv_bn_relu = layer_utils.ConvBnRelu( + in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2) + self.dsconv_bn_relu1 = layer_utils.DepthwiseConvBnRelu( + in_channels=dw_channels1, + out_channels=dw_channels2, + kernel_size=3, + stride=2, + padding=1) + self.dsconv_bn_relu2 = layer_utils.DepthwiseConvBnRelu( + in_channels=dw_channels2, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1) + + def forward(self, x): + x = self.conv_bn_relu(x) + x = self.dsconv_bn_relu1(x) + x = self.dsconv_bn_relu2(x) + return x + + +class GlobalFeatureExtractor(nn.Layer): + """ + Global feature extractor module + + This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and + a PPModule (introduced by PSPNet). + + Args: + in_channels (int): the number of input channels to the module. Default to 64. + + block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128). + + out_channels (int): the number of output channels of the module. Default to 128. + + expansion (int): the expansion factor in bottleneck. Default to 6. + + num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3). + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expansion=6, + num_blocks=(3, 3, 3)): + super(GlobalFeatureExtractor, self).__init__() + + self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, + block_channels[0], num_blocks[0], + expansion, 2) + self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], + block_channels[1], num_blocks[1], + expansion, 2) + self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], + block_channels[2], num_blocks[2], + expansion, 1) + + self.ppm = model_utils.PPModule( + block_channels[2], out_channels, dim_reduction=True) + + def _make_layer(self, + block, + in_channels, + out_channels, + blocks, + expansion=6, + stride=1): + layers = [] + layers.append(block(in_channels, out_channels, expansion, stride)) + for i in range(1, blocks): + layers.append(block(out_channels, out_channels, expansion, 1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = self.ppm(x) + return x + + +class LinearBottleneck(nn.Layer): + """ + Single bottleneck implementation. + + Args: + in_channels (int): the number of input channels to bottleneck block. + + out_channels (int): the number of output channels of bottleneck block. + + expansion (int). the expansion factor in bottleneck. Default to 6. + + stride (int). the stride used in depth-wise conv. + """ + + def __init__(self, + in_channels, + out_channels, + expansion=6, + stride=2, + **kwargs): + super(LinearBottleneck, self).__init__() + + self.use_shortcut = stride == 1 and in_channels == out_channels + + expand_channels = in_channels * expansion + self.block = nn.Sequential( + # pw + layer_utils.ConvBnRelu( + in_channels=in_channels, + out_channels=expand_channels, + kernel_size=1, + bias_attr=False), + # dw + layer_utils.ConvBnRelu( + in_channels=expand_channels, + out_channels=expand_channels, + kernel_size=3, + stride=stride, + padding=1, + groups=expand_channels, + bias_attr=False), + # pw-linear + nn.Conv2d( + in_channels=expand_channels, + out_channels=out_channels, + kernel_size=1, + bias_attr=False), + nn.SyncBatchNorm(out_channels)) + + def forward(self, x): + out = self.block(x) + if self.use_shortcut: + out = x + out + return out + + +class FeatureFusionModule(nn.Layer): + """ + Feature Fusion Module Implememtation. + + This module fuses high-resolution feature and low-resolution feature. + + Args: + high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample). + + low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor). + + out_channels (int). the output channels of this module. + """ + + def __init__(self, high_in_channels, low_in_channels, out_channels): + super(FeatureFusionModule, self).__init__() + + # There only depth-wise conv is used WITHOUT point-wise conv + self.dwconv = layer_utils.ConvBnRelu( + in_channels=low_in_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + groups=128, + bias_attr=False) + + self.conv_low_res = nn.Sequential( + nn.Conv2d( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=1), nn.SyncBatchNorm(out_channels)) + + self.conv_high_res = nn.Sequential( + nn.Conv2d( + in_channels=high_in_channels, + out_channels=out_channels, + kernel_size=1), nn.SyncBatchNorm(out_channels)) + + self.relu = nn.ReLU(True) + + def forward(self, high_res_input, low_res_input): + low_res_input = F.resize_bilinear(input=low_res_input, scale=4) + low_res_input = self.dwconv(low_res_input) + low_res_input = self.conv_low_res(low_res_input) + + high_res_input = self.conv_high_res(high_res_input) + + x = high_res_input + low_res_input + + return self.relu(x) + + +class Classifier(nn.Layer): + """ + The Classifier module implemetation. + + This module consists of two depth-wsie conv and one conv. + + Args: + input_channels (int): the input channels to this module. + + num_classes (int). the unique number of target classes. + + """ + + def __init__(self, input_channels, num_classes): + super(Classifier, self).__init__() + + self.dsconv1 = layer_utils.DepthwiseConvBnRelu( + in_channels=input_channels, + out_channels=input_channels, + kernel_size=3, + padding=1) + + self.dsconv2 = layer_utils.DepthwiseConvBnRelu( + in_channels=input_channels, + out_channels=input_channels, + kernel_size=3, + padding=1) + + self.conv = nn.Conv2d( + in_channels=input_channels, out_channels=num_classes, kernel_size=1) + + def forward(self, x): + x = self.dsconv1(x) + x = self.dsconv2(x) + x = F.dropout(x, p=0.1) # dropout_prob + x = self.conv(x) + return x diff --git a/dygraph/paddleseg/models/fcn.py b/dygraph/paddleseg/models/fcn.py new file mode 100644 index 0000000000000000000000000000000000000000..87446e017d142aa15aa373b9c17976701576a387 --- /dev/null +++ b/dygraph/paddleseg/models/fcn.py @@ -0,0 +1,204 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear +from paddle.fluid.initializer import Normal +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.cvlibs import manager +from paddleseg import utils +from paddleseg.cvlibs import param_init +from paddleseg.utils import logger + +__all__ = [ + "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18", + "fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44", + "fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64" +] + + +@manager.MODELS.add_component +class FCN(fluid.dygraph.Layer): + """ + Fully Convolutional Networks for Semantic Segmentation. + https://arxiv.org/abs/1411.4038 + + Args: + num_classes (int): the unique number of target classes. + + backbone (paddle.nn.Layer): backbone networks. + + model_pretrained (str): the path of pretrained model. + + backbone_indices (tuple): one values in the tuple indicte the indices of output of backbone.Default -1. + + backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. + + channels (int): channels after conv layer before the last one. + """ + + def __init__(self, + num_classes, + backbone, + backbone_pretrained=None, + model_pretrained=None, + backbone_indices=(-1, ), + backbone_channels=(270, ), + channels=None): + super(FCN, self).__init__() + + self.num_classes = num_classes + self.backbone_pretrained = backbone_pretrained + self.model_pretrained = model_pretrained + self.backbone_indices = backbone_indices + if channels is None: + channels = backbone_channels[backbone_indices[0]] + + self.backbone = backbone + self.conv_last_2 = ConvBNLayer( + num_channels=backbone_channels[backbone_indices[0]], + num_filters=channels, + filter_size=1, + stride=1) + self.conv_last_1 = Conv2D( + num_channels=channels, + num_filters=self.num_classes, + filter_size=1, + stride=1, + padding=0) + if self.training: + self.init_weight() + + def forward(self, x): + input_shape = x.shape[2:] + fea_list = self.backbone(x) + x = fea_list[self.backbone_indices[0]] + x = self.conv_last_2(x) + logit = self.conv_last_1(x) + logit = fluid.layers.resize_bilinear(logit, input_shape) + return [logit] + + def init_weight(self): + params = self.parameters() + for param in params: + param_name = param.name + if 'batch_norm' in param_name: + if 'w_0' in param_name: + param_init.constant_init(param, value=1.0) + elif 'b_0' in param_name: + param_init.constant_init(param, value=0.0) + if 'conv' in param_name and 'w_0' in param_name: + param_init.normal_init(param, scale=0.001) + + if self.model_pretrained is not None: + if os.path.exists(self.model_pretrained): + utils.load_pretrained_model(self, self.model_pretrained) + else: + raise Exception('Pretrained model is not found: {}'.format( + self.model_pretrained)) + elif self.backbone_pretrained is not None: + if os.path.exists(self.backbone_pretrained): + utils.load_pretrained_model(self.backbone, + self.backbone_pretrained) + else: + raise Exception('Pretrained model is not found: {}'.format( + self.backbone_pretrained)) + else: + logger.warning('No pretrained model to load, train from scratch') + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act="relu"): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) + self._batch_norm = BatchNorm(num_filters) + self.act = act + + def forward(self, input): + y = self._conv(input) + y = self._batch_norm(y) + if self.act == 'relu': + y = fluid.layers.relu(y) + return y + + +@manager.MODELS.add_component +def fcn_hrnet_w18_small_v1(*args, **kwargs): + return FCN(backbone='HRNet_W18_Small_V1', backbone_channels=(240), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w18_small_v2(*args, **kwargs): + return FCN(backbone='HRNet_W18_Small_V2', backbone_channels=(270), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w18(*args, **kwargs): + return FCN(backbone='HRNet_W18', backbone_channels=(270), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w30(*args, **kwargs): + return FCN(backbone='HRNet_W30', backbone_channels=(450), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w32(*args, **kwargs): + return FCN(backbone='HRNet_W32', backbone_channels=(480), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w40(*args, **kwargs): + return FCN(backbone='HRNet_W40', backbone_channels=(600), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w44(*args, **kwargs): + return FCN(backbone='HRNet_W44', backbone_channels=(660), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w48(*args, **kwargs): + return FCN(backbone='HRNet_W48', backbone_channels=(720), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w60(*args, **kwargs): + return FCN(backbone='HRNet_W60', backbone_channels=(900), **kwargs) + + +@manager.MODELS.add_component +def fcn_hrnet_w64(*args, **kwargs): + return FCN(backbone='HRNet_W64', backbone_channels=(960), **kwargs) diff --git a/dygraph/paddleseg/models/gcnet.py b/dygraph/paddleseg/models/gcnet.py new file mode 100644 index 0000000000000000000000000000000000000000..97a70d13f6c1f53a6123425f42db1315385d61d1 --- /dev/null +++ b/dygraph/paddleseg/models/gcnet.py @@ -0,0 +1,205 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn.functional as F +from paddle import nn +from paddleseg.cvlibs import manager +from paddleseg.models.common import layer_utils, model_utils +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class GCNet(nn.Layer): + """ + The GCNet implementation based on PaddlePaddle. + + The orginal artile refers to + Cao, Yue, et al. "GCnet: Non-local networks meet squeeze-excitation networks and beyond." + (https://arxiv.org/pdf/1904.11492.pdf) + + Args: + + num_classes (int): the unique number of target classes. + + backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101. + + model_pretrained (str): the path of pretrained model. Defaullt to None. + + backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. + the first index will be taken as a deep-supervision feature in auxiliary layer; + the second one will be taken as input of GlobalContextBlock. Usually backbone + consists of four downsampling stage, and return an output of each stage, so we + set default (2, 3), which means taking feature map of the third stage (res4b22) + and the fourth stage (res5c) in backbone. + + backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. + + gc_channels (int): input channels to Global Context Block. Default to 512. + + ratio (float): it indictes the ratio of attention channels and gc_channels. Default to 1/4. + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True. + + """ + + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + backbone_indices=(2, 3), + backbone_channels=(1024, 2048), + gc_channels=512, + ratio=1 / 4, + enable_auxiliary_loss=True, + pretrained_model=None): + + super(GCNet, self).__init__() + + self.backbone = backbone + + in_channels = backbone_channels[1] + self.conv_bn_relu1 = layer_utils.ConvBnRelu( + in_channels=in_channels, + out_channels=gc_channels, + kernel_size=3, + padding=1) + + self.gc_block = GlobalContextBlock(in_channels=gc_channels, ratio=ratio) + + self.conv_bn_relu2 = layer_utils.ConvBnRelu( + in_channels=gc_channels, + out_channels=gc_channels, + kernel_size=3, + padding=1) + + self.conv_bn_relu3 = layer_utils.ConvBnRelu( + in_channels=in_channels + gc_channels, + out_channels=gc_channels, + kernel_size=3, + padding=1) + + self.conv = nn.Conv2d( + in_channels=gc_channels, out_channels=num_classes, kernel_size=1) + + if enable_auxiliary_loss: + self.auxlayer = model_utils.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + _, feat_list = self.backbone(input) + x = feat_list[self.backbone_indices[1]] + + output = self.conv_bn_relu1(x) + output = self.gc_block(output) + output = self.conv_bn_relu2(output) + + output = paddle.concat([x, output], axis=1) + output = self.conv_bn_relu3(output) + + output = F.dropout(output, p=0.1) # dropout_prob + logit = self.conv(output) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + low_level_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.auxlayer(low_level_feat) + auxiliary_logit = F.resize_bilinear(auxiliary_logit, + input.shape[2:]) + logit_list.append(auxiliary_logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + +class GlobalContextBlock(nn.Layer): + """ + Global Context Block implementation. + + Args: + in_channels (int): input channels of Global Context Block + ratio (float): the channels of attention map. + """ + + def __init__(self, in_channels, ratio): + super(GlobalContextBlock, self).__init__() + + self.conv_mask = nn.Conv2d( + in_channels=in_channels, out_channels=1, kernel_size=1) + # current paddle version does not support Softmax class + # self.softmax = layer_utils.Activation("softmax", dim=2) + + inter_channels = int(in_channels * ratio) + self.channel_add_conv = nn.Sequential( + nn.Conv2d( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=1), + nn.LayerNorm(normalized_shape=[inter_channels, 1, 1]), nn.ReLU(), + nn.Conv2d( + in_channels=inter_channels, + out_channels=in_channels, + kernel_size=1)) + + def global_context_block(self, x): + batch, channel, height, width = x.shape + + # [N, C, H * W] + input_x = paddle.reshape(x, shape=[batch, channel, height * width]) + # [N, 1, C, H * W] + input_x = paddle.unsqueeze(input_x, axis=1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = paddle.reshape( + context_mask, shape=[batch, 1, height * width]) + context_mask = F.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = paddle.unsqueeze(context_mask, axis=-1) + # [N, 1, C, 1] + context = paddle.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = paddle.reshape(context, shape=[batch, channel, 1, 1]) + + return context + + def forward(self, x): + context = self.global_context_block(x) + channel_add_term = self.channel_add_conv(context) + out = x + channel_add_term + return out diff --git a/dygraph/paddleseg/models/losses/__init__.py b/dygraph/paddleseg/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f58a9fe1dccce025fa5ee9dec8887fbfc3b9deb8 --- /dev/null +++ b/dygraph/paddleseg/models/losses/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .cross_entroy_loss import CrossEntropyLoss diff --git a/dygraph/paddleseg/models/losses/cross_entroy_loss.py b/dygraph/paddleseg/models/losses/cross_entroy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..c9f49bcd4e84527ea812a608c3fb1e29de6416aa --- /dev/null +++ b/dygraph/paddleseg/models/losses/cross_entroy_loss.py @@ -0,0 +1,141 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import nn +import paddle.nn.functional as F +import paddle.fluid as fluid + +from paddleseg.cvlibs import manager +''' +@manager.LOSSES.add_component +class CrossEntropyLoss(nn.CrossEntropyLoss): + """ + Implements the cross entropy loss function. + + Args: + weight (Tensor): Weight tensor, a manual rescaling weight given + to each class and the shape is (C). It has the same dimensions as class + number and the data type is float32, float64. Default ``'None'``. + ignore_index (int64): Specifies a target value that is ignored + and does not contribute to the input gradient. Default ``255``. + reduction (str): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + Default ``'mean'``. + + """ + + def __init__(self, weight=None, ignore_index=255, reduction='mean'): + self.weight = weight + self.ignore_index = ignore_index + self.reduction = reduction + self.EPS = 1e-5 + if self.reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" + " 'none', but received %s, which is not allowed." % + self.reduction) + + def forward(self, logit, label): + """ + Forward computation. + Args: + logit (Tensor): logit tensor, the data type is float32, float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Variable): label tensor, the data type is int64. Shape is (N), where each + value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is + (N, D1, D2,..., Dk), k >= 1. + """ + loss = paddle.nn.functional.cross_entropy( + logit, + label, + weight=self.weight, + ignore_index=self.ignore_index, + reduction=self.reduction) + + mask = label != self.ignore_index + mask = paddle.cast(mask, 'float32') + avg_loss = loss / (paddle.mean(mask) + self.EPS) + + label.stop_gradient = True + mask.stop_gradient = True + return avg_loss +''' + + +@manager.LOSSES.add_component +class CrossEntropyLoss(nn.Layer): + """ + Implements the cross entropy loss function. + + Args: + ignore_index (int64): Specifies a target value that is ignored + and does not contribute to the input gradient. Default ``255``. + """ + + def __init__(self, ignore_index=255): + super(CrossEntropyLoss, self).__init__() + self.ignore_index = ignore_index + self.EPS = 1e-5 + + def forward(self, logit, label): + """ + Forward computation. + Args: + logit (Tensor): logit tensor, the data type is float32, float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Variable): label tensor, the data type is int64. Shape is (N), where each + value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is + (N, D1, D2,..., Dk), k >= 1. + """ + if len(label.shape) != len(logit.shape): + label = paddle.unsqueeze(label, 1) + + # logit = paddle.transpose(logit, [0, 2, 3, 1]) + # label = paddle.transpose(label, [0, 2, 3, 1]) + # loss = F.softmax_with_cross_entropy( + # logit, label, ignore_index=self.ignore_index, axis=-1) + # loss = paddle.reduce_mean(loss) + + # mask = label != self.ignore_index + # mask = paddle.cast(mask, 'float32') + # avg_loss = loss / (paddle.mean(mask) + self.EPS) + + # label.stop_gradient = True + # mask.stop_gradient = True + # return avg_loss + + logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) + label = fluid.layers.transpose(label, [0, 2, 3, 1]) + mask = label != self.ignore_index + mask = fluid.layers.cast(mask, 'float32') + loss, probs = fluid.layers.softmax_with_cross_entropy( + logit, + label, + ignore_index=self.ignore_index, + return_softmax=True, + axis=-1) + + loss = loss * mask + avg_loss = fluid.layers.mean(loss) / ( + fluid.layers.mean(mask) + self.EPS) + + label.stop_gradient = True + mask.stop_gradient = True + return avg_loss diff --git a/dygraph/paddleseg/models/ocrnet.py b/dygraph/paddleseg/models/ocrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..78dfd136d7aaf15aed50f598c66ddbf72ac1e242 --- /dev/null +++ b/dygraph/paddleseg/models/ocrnet.py @@ -0,0 +1,215 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle.fluid as fluid +from paddle.fluid.dygraph import Sequential, Conv2D + +from paddleseg.cvlibs import manager +from paddleseg.models.common.layer_utils import ConvBnRelu +from paddleseg import utils + + +class SpatialGatherBlock(fluid.dygraph.Layer): + def forward(self, pixels, regions): + n, c, h, w = pixels.shape + _, k, _, _ = regions.shape + + # pixels: from (n, c, h, w) to (n, h*w, c) + pixels = fluid.layers.reshape(pixels, (n, c, h * w)) + pixels = fluid.layers.transpose(pixels, (0, 2, 1)) + + # regions: from (n, k, h, w) to (n, k, h*w) + regions = fluid.layers.reshape(regions, (n, k, h * w)) + regions = fluid.layers.softmax(regions, axis=2) + + # feats: from (n, k, c) to (n, c, k, 1) + feats = fluid.layers.matmul(regions, pixels) + feats = fluid.layers.transpose(feats, (0, 2, 1)) + feats = fluid.layers.unsqueeze(feats, axes=[-1]) + + return feats + + +class SpatialOCRModule(fluid.dygraph.Layer): + def __init__(self, + in_channels, + key_channels, + out_channels, + dropout_rate=0.1): + super(SpatialOCRModule, self).__init__() + + self.attention_block = ObjectAttentionBlock(in_channels, key_channels) + self.dropout_rate = dropout_rate + self.conv1x1 = Conv2D(2 * in_channels, out_channels, 1) + + def forward(self, pixels, regions): + context = self.attention_block(pixels, regions) + feats = fluid.layers.concat([context, pixels], axis=1) + + feats = self.conv1x1(feats) + feats = fluid.layers.dropout(feats, self.dropout_rate) + + return feats + + +class ObjectAttentionBlock(fluid.dygraph.Layer): + def __init__(self, in_channels, key_channels): + super(ObjectAttentionBlock, self).__init__() + + self.in_channels = in_channels + self.key_channels = key_channels + + self.f_pixel = Sequential( + ConvBnRelu(in_channels, key_channels, 1), + ConvBnRelu(key_channels, key_channels, 1)) + + self.f_object = Sequential( + ConvBnRelu(in_channels, key_channels, 1), + ConvBnRelu(key_channels, key_channels, 1)) + + self.f_down = ConvBnRelu(in_channels, key_channels, 1) + + self.f_up = ConvBnRelu(key_channels, in_channels, 1) + + def forward(self, x, proxy): + n, _, h, w = x.shape + + # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels) + query = self.f_pixel(x) + query = fluid.layers.reshape(query, (n, self.key_channels, -1)) + query = fluid.layers.transpose(query, (0, 2, 1)) + + # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2) + key = self.f_object(proxy) + key = fluid.layers.reshape(key, (n, self.key_channels, -1)) + + # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels) + value = self.f_down(proxy) + value = fluid.layers.reshape(value, (n, self.key_channels, -1)) + value = fluid.layers.transpose(value, (0, 2, 1)) + + # sim_map (n, h1*w1, h2*w2) + sim_map = fluid.layers.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = fluid.layers.softmax(sim_map, axis=-1) + + # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1) + context = fluid.layers.matmul(sim_map, value) + context = fluid.layers.transpose(context, (0, 2, 1)) + context = fluid.layers.reshape(context, (n, self.key_channels, h, w)) + context = self.f_up(context) + + return context + + +@manager.MODELS.add_component +class OCRNet(fluid.dygraph.Layer): + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + in_channels=None, + ocr_mid_channels=512, + ocr_key_channels=256, + ignore_index=255): + super(OCRNet, self).__init__() + + self.ignore_index = ignore_index + self.num_classes = num_classes + self.EPS = 1e-5 + + self.backbone = backbone + self.spatial_gather = SpatialGatherBlock() + self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels, + ocr_mid_channels) + self.conv3x3_ocr = ConvBnRelu( + in_channels, ocr_mid_channels, 3, padding=1) + self.cls_head = Conv2D(ocr_mid_channels, self.num_classes, 1) + + self.aux_head = Sequential( + ConvBnRelu(in_channels, in_channels, 3, padding=1), + Conv2D(in_channels, self.num_classes, 1)) + + self.init_weight(model_pretrained) + + def forward(self, x, label=None): + feats = self.backbone(x) + + soft_regions = self.aux_head(feats) + pixels = self.conv3x3_ocr(feats) + + object_regions = self.spatial_gather(pixels, soft_regions) + ocr = self.spatial_ocr(pixels, object_regions) + + logit = self.cls_head(ocr) + logit = fluid.layers.resize_bilinear(logit, x.shape[2:]) + + if self.training: + soft_regions = fluid.layers.resize_bilinear(soft_regions, + x.shape[2:]) + cls_loss = self._get_loss(logit, label) + aux_loss = self._get_loss(soft_regions, label) + return cls_loss + 0.4 * aux_loss + + score_map = fluid.layers.softmax(logit, axis=1) + score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) + pred = fluid.layers.argmax(score_map, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + return pred, score_map + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model.. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + def _get_loss(self, logit, label): + """ + compute forward loss of the model + + Args: + logit (tensor): the logit of model output + label (tensor): ground truth + + Returns: + avg_loss (tensor): forward loss + """ + logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) + label = fluid.layers.transpose(label, [0, 2, 3, 1]) + mask = label != self.ignore_index + mask = fluid.layers.cast(mask, 'float32') + loss, probs = fluid.layers.softmax_with_cross_entropy( + logit, + label, + ignore_index=self.ignore_index, + return_softmax=True, + axis=-1) + + loss = loss * mask + avg_loss = fluid.layers.mean(loss) / ( + fluid.layers.mean(mask) + self.EPS) + + label.stop_gradient = True + mask.stop_gradient = True + + return avg_loss diff --git a/dygraph/paddleseg/models/pspnet.py b/dygraph/paddleseg/models/pspnet.py new file mode 100644 index 0000000000000000000000000000000000000000..764749ce09f4618420d142d1955cf52d9aa5c258 --- /dev/null +++ b/dygraph/paddleseg/models/pspnet.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle.nn.functional as F +from paddle import nn +from paddleseg.cvlibs import manager +from paddleseg.models.common import model_utils +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class PSPNet(nn.Layer): + """ + The PSPNet implementation based on PaddlePaddle. + + The orginal artile refers to + Zhao, Hengshuang, et al. "Pyramid scene parsing network." + Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. + (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + + Args: + num_classes (int): the unique number of target classes. + + backbone (Paddle.nn.Layer): backbone network, currently support Resnet50/101. + + model_pretrained (str): the path of pretrained model. Defaullt to None. + + backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. + the first index will be taken as a deep-supervision feature in auxiliary layer; + the second one will be taken as input of Pyramid Pooling Module (PPModule). + Usually backbone consists of four downsampling stage, and return an output of + each stage, so we set default (2, 3), which means taking feature map of the third + stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule. + + backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. + + pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024. + + bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). + + enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True. + + """ + + def __init__(self, + num_classes, + backbone, + model_pretrained=None, + backbone_indices=(2, 3), + backbone_channels=(1024, 2048), + pp_out_channels=1024, + bin_sizes=(1, 2, 3, 6), + enable_auxiliary_loss=True): + + super(PSPNet, self).__init__() + + self.backbone = backbone + self.backbone_indices = backbone_indices + + self.psp_module = model_utils.PPModule( + in_channels=backbone_channels[1], + out_channels=pp_out_channels, + bin_sizes=bin_sizes) + + self.conv = nn.Conv2d( + in_channels=pp_out_channels, + out_channels=num_classes, + kernel_size=1) + + if enable_auxiliary_loss: + self.fcn_head = model_utils.FCNHead( + in_channels=backbone_channels[0], out_channels=num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + + self.init_weight(model_pretrained) + + def forward(self, input, label=None): + + logit_list = [] + _, feat_list = self.backbone(input) + + x = feat_list[self.backbone_indices[1]] + x = self.psp_module(x) + x = F.dropout(x, p=0.1) # dropout_prob + logit = self.conv(x) + logit = F.resize_bilinear(logit, input.shape[2:]) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.fcn_head(auxiliary_feat) + auxiliary_logit = F.resize_bilinear(auxiliary_logit, + input.shape[2:]) + logit_list.append(auxiliary_logit) + + return logit_list + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) diff --git a/dygraph/paddleseg/models/unet.py b/dygraph/paddleseg/models/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..f7bd847cff52accdaeacdeadf861f4350d338700 --- /dev/null +++ b/dygraph/paddleseg/models/unet.py @@ -0,0 +1,203 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle.fluid as fluid +from paddle.fluid.dygraph import Conv2D, Pool2D +from paddle.nn import SyncBatchNorm as BatchNorm + +from paddleseg.cvlibs import manager +from paddleseg import utils + + +class UNet(fluid.dygraph.Layer): + """ + U-Net: Convolutional Networks for Biomedical Image Segmentation. + https://arxiv.org/abs/1505.04597 + + Args: + num_classes (int): the unique number of target classes. + pretrained_model (str): the path of pretrained model. + ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255. + """ + + def __init__(self, num_classes, model_pretrained=None, ignore_index=255): + super(UNet, self).__init__() + self.encode = UnetEncoder() + self.decode = UnetDecode() + self.get_logit = GetLogit(64, num_classes) + self.ignore_index = ignore_index + self.EPS = 1e-5 + + self.init_weight(model_pretrained) + + def forward(self, x, label=None): + encode_data, short_cuts = self.encode(x) + decode_data = self.decode(encode_data, short_cuts) + logit = self.get_logit(decode_data) + if self.training: + return self._get_loss(logit, label) + else: + score_map = fluid.layers.softmax(logit, axis=1) + score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) + pred = fluid.layers.argmax(score_map, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + return pred, score_map + + def init_weight(self, pretrained_model=None): + """ + Initialize the parameters of model parts. + Args: + pretrained_model ([str], optional): the path of pretrained model. Defaults to None. + """ + if pretrained_model is not None: + if os.path.exists(pretrained_model): + utils.load_pretrained_model(self, pretrained_model) + else: + raise Exception('Pretrained model is not found: {}'.format( + pretrained_model)) + + def _get_loss(self, logit, label): + logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) + label = fluid.layers.transpose(label, [0, 2, 3, 1]) + mask = label != self.ignore_index + mask = fluid.layers.cast(mask, 'float32') + loss, probs = fluid.layers.softmax_with_cross_entropy( + logit, + label, + ignore_index=self.ignore_index, + return_softmax=True, + axis=-1) + + loss = loss * mask + avg_loss = fluid.layers.mean(loss) / ( + fluid.layers.mean(mask) + self.EPS) + + label.stop_gradient = True + mask.stop_gradient = True + return avg_loss + + +class UnetEncoder(fluid.dygraph.Layer): + def __init__(self): + super(UnetEncoder, self).__init__() + self.double_conv = DoubleConv(3, 64) + self.down1 = Down(64, 128) + self.down2 = Down(128, 256) + self.down3 = Down(256, 512) + self.down4 = Down(512, 512) + + def forward(self, x): + short_cuts = [] + x = self.double_conv(x) + short_cuts.append(x) + x = self.down1(x) + short_cuts.append(x) + x = self.down2(x) + short_cuts.append(x) + x = self.down3(x) + short_cuts.append(x) + x = self.down4(x) + return x, short_cuts + + +class UnetDecode(fluid.dygraph.Layer): + def __init__(self): + super(UnetDecode, self).__init__() + self.up1 = Up(512, 256) + self.up2 = Up(256, 128) + self.up3 = Up(128, 64) + self.up4 = Up(64, 64) + + def forward(self, x, short_cuts): + x = self.up1(x, short_cuts[3]) + x = self.up2(x, short_cuts[2]) + x = self.up3(x, short_cuts[1]) + x = self.up4(x, short_cuts[0]) + return x + + +class DoubleConv(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters): + super(DoubleConv, self).__init__() + self.conv0 = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=1, + padding=1) + self.bn0 = BatchNorm(num_filters) + self.conv1 = Conv2D( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + padding=1) + self.bn1 = BatchNorm(num_filters) + + def forward(self, x): + x = self.conv0(x) + x = self.bn0(x) + x = fluid.layers.relu(x) + x = self.conv1(x) + x = self.bn1(x) + x = fluid.layers.relu(x) + return x + + +class Down(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters): + super(Down, self).__init__() + self.max_pool = Pool2D( + pool_size=2, pool_type='max', pool_stride=2, pool_padding=0) + self.double_conv = DoubleConv(num_channels, num_filters) + + def forward(self, x): + x = self.max_pool(x) + x = self.double_conv(x) + return x + + +class Up(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters): + super(Up, self).__init__() + self.double_conv = DoubleConv(2 * num_channels, num_filters) + + def forward(self, x, short_cut): + short_cut_shape = fluid.layers.shape(short_cut) + x = fluid.layers.resize_bilinear(x, short_cut_shape[2:]) + x = fluid.layers.concat([x, short_cut], axis=1) + x = self.double_conv(x) + return x + + +class GetLogit(fluid.dygraph.Layer): + def __init__(self, num_channels, num_classes): + super(GetLogit, self).__init__() + self.conv = Conv2D( + num_channels=num_channels, + num_filters=num_classes, + filter_size=3, + stride=1, + padding=1) + + def forward(self, x): + x = self.conv(x) + return x + + +@manager.MODELS.add_component +def unet(*args, **kwargs): + return UNet(*args, **kwargs) diff --git a/dygraph/paddleseg/transforms/__init__.py b/dygraph/paddleseg/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8f1d5ae80aeb1eb77ac672b1cbcfedcbfbd643c4 --- /dev/null +++ b/dygraph/paddleseg/transforms/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .transforms import * +from . import functional diff --git a/dygraph/paddleseg/transforms/functional.py b/dygraph/paddleseg/transforms/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..6d5a9b10db15edb05692c8aa4249912652e0a745 --- /dev/null +++ b/dygraph/paddleseg/transforms/functional.py @@ -0,0 +1,99 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance + + +def normalize(im, mean, std): + im = im.astype(np.float32, copy=False) / 255.0 + im -= mean + im /= std + return im + + +def permute(im): + im = np.transpose(im, (2, 0, 1)) + return im + + +def resize(im, target_size=608, interp=cv2.INTER_LINEAR): + if isinstance(target_size, list) or isinstance(target_size, tuple): + w = target_size[0] + h = target_size[1] + else: + w = target_size + h = target_size + im = cv2.resize(im, (w, h), interpolation=interp) + return im + + +def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): + value = max(im.shape[0], im.shape[1]) + scale = float(long_size) / float(value) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + + im = cv2.resize( + im, (resized_width, resized_height), interpolation=interpolation) + return im + + +def horizontal_flip(im): + if len(im.shape) == 3: + im = im[:, ::-1, :] + elif len(im.shape) == 2: + im = im[:, ::-1] + return im + + +def vertical_flip(im): + if len(im.shape) == 3: + im = im[::-1, :, :] + elif len(im.shape) == 2: + im = im[::-1, :] + return im + + +def brightness(im, brightness_lower, brightness_upper): + brightness_delta = np.random.uniform(brightness_lower, brightness_upper) + im = ImageEnhance.Brightness(im).enhance(brightness_delta) + return im + + +def contrast(im, contrast_lower, contrast_upper): + contrast_delta = np.random.uniform(contrast_lower, contrast_upper) + im = ImageEnhance.Contrast(im).enhance(contrast_delta) + return im + + +def saturation(im, saturation_lower, saturation_upper): + saturation_delta = np.random.uniform(saturation_lower, saturation_upper) + im = ImageEnhance.Color(im).enhance(saturation_delta) + return im + + +def hue(im, hue_lower, hue_upper): + hue_delta = np.random.uniform(hue_lower, hue_upper) + im = np.array(im.convert('HSV')) + im[:, :, 0] = im[:, :, 0] + hue_delta + im = Image.fromarray(im, mode='HSV').convert('RGB') + return im + + +def rotate(im, rotate_lower, rotate_upper): + rotate_delta = np.random.uniform(rotate_lower, rotate_upper) + im = im.rotate(int(rotate_delta)) + return im diff --git a/dygraph/paddleseg/transforms/transforms.py b/dygraph/paddleseg/transforms/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..4693d429d5b00236224c78165c2ef8dbf1ed088e --- /dev/null +++ b/dygraph/paddleseg/transforms/transforms.py @@ -0,0 +1,576 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from collections import OrderedDict + +import numpy as np +from PIL import Image +import cv2 + +from .functional import * +from paddleseg.cvlibs import manager + + +@manager.TRANSFORMS.add_component +class Compose: + def __init__(self, transforms, to_rgb=True): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + if len(transforms) < 1: + raise ValueError('The length of transforms ' + \ + 'must be equal or larger than 1!') + self.transforms = transforms + self.to_rgb = to_rgb + + def __call__(self, im, im_info=None, label=None): + if im_info is None: + im_info = list() + if isinstance(im, str): + im = cv2.imread(im).astype('float32') + if isinstance(label, str): + label = np.asarray(Image.open(label)) + if im is None: + raise ValueError('Can\'t read The image file {}!'.format(im)) + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + for op in self.transforms: + outputs = op(im, im_info, label) + im = outputs[0] + if len(outputs) >= 2: + im_info = outputs[1] + if len(outputs) == 3: + label = outputs[2] + im = permute(im) + # if len(outputs) == 3: + # label = label[np.newaxis, :, :] + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomHorizontalFlip: + def __init__(self, prob=0.5): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + if random.random() < self.prob: + im = horizontal_flip(im) + if label is not None: + label = horizontal_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomVerticalFlip: + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + if random.random() < self.prob: + im = vertical_flip(im) + if label is not None: + label = vertical_flip(label) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class Resize: + # The interpolation mode + interp_dict = { + 'NEAREST': cv2.INTER_NEAREST, + 'LINEAR': cv2.INTER_LINEAR, + 'CUBIC': cv2.INTER_CUBIC, + 'AREA': cv2.INTER_AREA, + 'LANCZOS4': cv2.INTER_LANCZOS4 + } + + def __init__(self, target_size=512, interp='LINEAR'): + self.interp = interp + if not (interp == "RANDOM" or interp in self.interp_dict): + raise ValueError("interp should be one of {}".format( + self.interp_dict.keys())) + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise TypeError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + + self.target_size = target_size + + def __call__(self, im, im_info=None, label=None): + if im_info is None: + im_info = list() + im_info.append(('resize', im.shape[:2])) + if not isinstance(im, np.ndarray): + raise TypeError("Resize: image type is not numpy.") + if len(im.shape) != 3: + raise ValueError('Resize: image is not 3-dimensional.') + if self.interp == "RANDOM": + interp = random.choice(list(self.interp_dict.keys())) + else: + interp = self.interp + im = resize(im, self.target_size, self.interp_dict[interp]) + if label is not None: + label = resize(label, self.target_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class ResizeByLong: + def __init__(self, long_size): + self.long_size = long_size + + def __call__(self, im, im_info=None, label=None): + if im_info is None: + im_info = list() + + im_info.append(('resize', im.shape[:2])) + im = resize_long(im, self.long_size) + if label is not None: + label = resize_long(label, self.long_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class ResizeRangeScaling: + def __init__(self, min_value=400, max_value=600): + if min_value > max_value: + raise ValueError('min_value must be less than max_value, ' + 'but they are {} and {}.'.format( + min_value, max_value)) + self.min_value = min_value + self.max_value = max_value + + def __call__(self, im, im_info=None, label=None): + if self.min_value == self.max_value: + random_size = self.max_value + else: + random_size = int( + np.random.uniform(self.min_value, self.max_value) + 0.5) + im = resize_long(im, random_size, cv2.INTER_LINEAR) + if label is not None: + label = resize_long(label, random_size, cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class ResizeStepScaling: + def __init__(self, + min_scale_factor=0.75, + max_scale_factor=1.25, + scale_step_size=0.25): + if min_scale_factor > max_scale_factor: + raise ValueError( + 'min_scale_factor must be less than max_scale_factor, ' + 'but they are {} and {}.'.format(min_scale_factor, + max_scale_factor)) + self.min_scale_factor = min_scale_factor + self.max_scale_factor = max_scale_factor + self.scale_step_size = scale_step_size + + def __call__(self, im, im_info=None, label=None): + if self.min_scale_factor == self.max_scale_factor: + scale_factor = self.min_scale_factor + + elif self.scale_step_size == 0: + scale_factor = np.random.uniform(self.min_scale_factor, + self.max_scale_factor) + + else: + num_steps = int((self.max_scale_factor - self.min_scale_factor) / + self.scale_step_size + 1) + scale_factors = np.linspace(self.min_scale_factor, + self.max_scale_factor, + num_steps).tolist() + np.random.shuffle(scale_factors) + scale_factor = scale_factors[0] + w = int(round(scale_factor * im.shape[1])) + h = int(round(scale_factor * im.shape[0])) + + im = resize(im, (w, h), cv2.INTER_LINEAR) + if label is not None: + label = resize(label, (w, h), cv2.INTER_NEAREST) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class Normalize: + def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): + self.mean = mean + self.std = std + if not (isinstance(self.mean, list) and isinstance(self.std, list)): + raise ValueError("{}: input type is invalid.".format(self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, im, im_info=None, label=None): + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im = normalize(im, mean, std) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class Padding: + def __init__(self, + target_size, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(target_size, list) or isinstance(target_size, tuple): + if len(target_size) != 2: + raise ValueError( + 'when target is list or tuple, it should include 2 elements, but it is {}' + .format(target_size)) + elif not isinstance(target_size, int): + raise TypeError( + "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(target_size))) + self.target_size = target_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + if im_info is None: + im_info = list() + im_info.append(('padding', im.shape[:2])) + + im_height, im_width = im.shape[0], im.shape[1] + if isinstance(self.target_size, int): + target_height = self.target_size + target_width = self.target_size + else: + target_height = self.target_size[1] + target_width = self.target_size[0] + pad_height = target_height - im_height + pad_width = target_width - im_width + if pad_height < 0 or pad_width < 0: + raise ValueError( + 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' + .format(im_width, im_height, target_width, target_height)) + else: + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomPaddingCrop: + def __init__(self, + crop_size=512, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + if isinstance(crop_size, list) or isinstance(crop_size, tuple): + if len(crop_size) != 2: + raise ValueError( + 'when crop_size is list or tuple, it should include 2 elements, but it is {}' + .format(crop_size)) + elif not isinstance(crop_size, int): + raise TypeError( + "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" + .format(type(crop_size))) + self.crop_size = crop_size + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + if isinstance(self.crop_size, int): + crop_width = self.crop_size + crop_height = self.crop_size + else: + crop_width = self.crop_size[0] + crop_height = self.crop_size[1] + + img_height = im.shape[0] + img_width = im.shape[1] + + if img_height == crop_height and img_width == crop_width: + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + else: + pad_height = max(crop_height - img_height, 0) + pad_width = max(crop_width - img_width, 0) + if (pad_height > 0 or pad_width > 0): + im = cv2.copyMakeBorder( + im, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.im_padding_value) + if label is not None: + label = cv2.copyMakeBorder( + label, + 0, + pad_height, + 0, + pad_width, + cv2.BORDER_CONSTANT, + value=self.label_padding_value) + img_height = im.shape[0] + img_width = im.shape[1] + + if crop_height > 0 and crop_width > 0: + h_off = np.random.randint(img_height - crop_height + 1) + w_off = np.random.randint(img_width - crop_width + 1) + + im = im[h_off:(crop_height + h_off), w_off:( + w_off + crop_width), :] + if label is not None: + label = label[h_off:(crop_height + h_off), w_off:( + w_off + crop_width)] + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomBlur: + def __init__(self, prob=0.1): + self.prob = prob + + def __call__(self, im, im_info=None, label=None): + if self.prob <= 0: + n = 0 + elif self.prob >= 1: + n = 1 + else: + n = int(1.0 / self.prob) + if n > 0: + if np.random.randint(0, n) == 0: + radius = np.random.randint(3, 10) + if radius % 2 != 1: + radius = radius + 1 + if radius > 9: + radius = 9 + im = cv2.GaussianBlur(im, (radius, radius), 0, 0) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomRotation: + def __init__(self, + max_rotation=15, + im_padding_value=[127.5, 127.5, 127.5], + label_padding_value=255): + self.max_rotation = max_rotation + self.im_padding_value = im_padding_value + self.label_padding_value = label_padding_value + + def __call__(self, im, im_info=None, label=None): + if self.max_rotation > 0: + (h, w) = im.shape[:2] + do_rotation = np.random.uniform(-self.max_rotation, + self.max_rotation) + pc = (w // 2, h // 2) + r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) + cos = np.abs(r[0, 0]) + sin = np.abs(r[0, 1]) + + nw = int((h * sin) + (w * cos)) + nh = int((h * cos) + (w * sin)) + + (cx, cy) = pc + r[0, 2] += (nw / 2) - cx + r[1, 2] += (nh / 2) - cy + dsize = (nw, nh) + im = cv2.warpAffine( + im, + r, + dsize=dsize, + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + borderValue=self.im_padding_value) + label = cv2.warpAffine( + label, + r, + dsize=dsize, + flags=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=self.label_padding_value) + + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomScaleAspect: + def __init__(self, min_scale=0.5, aspect_ratio=0.33): + self.min_scale = min_scale + self.aspect_ratio = aspect_ratio + + def __call__(self, im, im_info=None, label=None): + if self.min_scale != 0 and self.aspect_ratio != 0: + img_height = im.shape[0] + img_width = im.shape[1] + for i in range(0, 10): + area = img_height * img_width + target_area = area * np.random.uniform(self.min_scale, 1.0) + aspectRatio = np.random.uniform(self.aspect_ratio, + 1.0 / self.aspect_ratio) + + dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) + dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) + if (np.random.randint(10) < 5): + tmp = dw + dw = dh + dh = tmp + + if (dh < img_height and dw < img_width): + h1 = np.random.randint(0, img_height - dh) + w1 = np.random.randint(0, img_width - dw) + + im = im[h1:(h1 + dh), w1:(w1 + dw), :] + label = label[h1:(h1 + dh), w1:(w1 + dw)] + im = cv2.resize( + im, (img_width, img_height), + interpolation=cv2.INTER_LINEAR) + label = cv2.resize( + label, (img_width, img_height), + interpolation=cv2.INTER_NEAREST) + break + if label is None: + return (im, im_info) + else: + return (im, im_info, label) + + +@manager.TRANSFORMS.add_component +class RandomDistort: + def __init__(self, + brightness_range=0.5, + brightness_prob=0.5, + contrast_range=0.5, + contrast_prob=0.5, + saturation_range=0.5, + saturation_prob=0.5, + hue_range=18, + hue_prob=0.5): + self.brightness_range = brightness_range + self.brightness_prob = brightness_prob + self.contrast_range = contrast_range + self.contrast_prob = contrast_prob + self.saturation_range = saturation_range + self.saturation_prob = saturation_prob + self.hue_range = hue_range + self.hue_prob = hue_prob + + def __call__(self, im, im_info=None, label=None): + brightness_lower = 1 - self.brightness_range + brightness_upper = 1 + self.brightness_range + contrast_lower = 1 - self.contrast_range + contrast_upper = 1 + self.contrast_range + saturation_lower = 1 - self.saturation_range + saturation_upper = 1 + self.saturation_range + hue_lower = -self.hue_range + hue_upper = self.hue_range + ops = [brightness, contrast, saturation, hue] + random.shuffle(ops) + params_dict = { + 'brightness': { + 'brightness_lower': brightness_lower, + 'brightness_upper': brightness_upper + }, + 'contrast': { + 'contrast_lower': contrast_lower, + 'contrast_upper': contrast_upper + }, + 'saturation': { + 'saturation_lower': saturation_lower, + 'saturation_upper': saturation_upper + }, + 'hue': { + 'hue_lower': hue_lower, + 'hue_upper': hue_upper + } + } + prob_dict = { + 'brightness': self.brightness_prob, + 'contrast': self.contrast_prob, + 'saturation': self.saturation_prob, + 'hue': self.hue_prob + } + im = im.astype('uint8') + im = Image.fromarray(im) + for id in range(4): + params = params_dict[ops[id].__name__] + prob = prob_dict[ops[id].__name__] + params['im'] = im + if np.random.uniform(0, 1) < prob: + im = ops[id](**params) + im = np.asarray(im).astype('float32') + if label is None: + return (im, im_info) + else: + return (im, im_info, label) diff --git a/dygraph/paddleseg/utils/__init__.py b/dygraph/paddleseg/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a22f9e5ec0ff32a5e42b6c2d7d6bed14a56994a1 --- /dev/null +++ b/dygraph/paddleseg/utils/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import logger +from . import download +from .metrics import ConfusionMatrix +from .utils import * +from .timer import Timer, calculate_eta +from .get_environ_info import get_environ_info +from .config import Config diff --git a/dygraph/paddleseg/utils/config.py b/dygraph/paddleseg/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..270cabba6281980e79dfd1735756c49169e938ae --- /dev/null +++ b/dygraph/paddleseg/utils/config.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import codecs +import os +from typing import Any, Callable + +import yaml +import paddle.fluid as fluid + +import paddleseg.cvlibs.manager as manager + + +class Config(object): + ''' + Training config. + + Args: + path(str) : the path of config file, supports yaml format only + ''' + + def __init__(self, path: str): + if not os.path.exists(path): + raise FileNotFoundError('File {} does not exist'.format(path)) + + if path.endswith('yml') or path.endswith('yaml'): + dic = self._parse_from_yaml(path) + print(dic) + self._build(dic) + else: + raise RuntimeError('Config file should in yaml format!') + + def _update_dic(self, dic, base_dic): + """ + update config from dic based base_dic + """ + base_dic = base_dic.copy() + for key, val in dic.items(): + if isinstance(val, dict) and key in base_dic: + base_dic[key] = self._update_dic(val, base_dic[key]) + else: + base_dic[key] = val + dic = base_dic + return dic + + def _parse_from_yaml(self, path: str): + '''Parse a yaml file and build config''' + with codecs.open(path, 'r', 'utf-8') as file: + dic = yaml.load(file, Loader=yaml.FullLoader) + if '_base_' in dic: + cfg_dir = os.path.dirname(path) + base_path = dic.pop('_base_') + base_path = os.path.join(cfg_dir, base_path) + base_dic = self._parse_from_yaml(base_path) + dic = self._update_dic(dic, base_dic) + return dic + + def _build(self, dic: dict): + '''Build config from dictionary''' + dic = dic.copy() + + self._batch_size = dic.get('batch_size', 1) + self._iters = dic.get('iters') + + if 'model' not in dic: + raise RuntimeError() + self._model_cfg = dic['model'] + self._model = None + + self._train_dataset = dic.get('train_dataset') + self._val_dataset = dic.get('val_dataset') + + self._learning_rate_cfg = dic.get('learning_rate', {}) + self._learning_rate = self._learning_rate_cfg.get('value') + self._decay = self._learning_rate_cfg.get('decay', { + 'type': 'poly', + 'power': 0.9 + }) + + self._loss_cfg = dic.get('loss', {}) + self._losses = None + + self._optimizer_cfg = dic.get('optimizer', {}) + + def update(self, + learning_rate: float = None, + batch_size: int = None, + iters: int = None): + '''Update config''' + if learning_rate: + self._learning_rate = learning_rate + + if batch_size: + self._batch_size = batch_size + + if iters: + self._iters = iters + + @property + def batch_size(self) -> int: + return self._batch_size + + @property + def iters(self) -> int: + if not self._iters: + raise RuntimeError('No iters specified in the configuration file.') + return self._iters + + @property + def learning_rate(self) -> float: + if not self._learning_rate: + raise RuntimeError( + 'No learning rate specified in the configuration file.') + + if self.decay_type == 'poly': + lr = self._learning_rate + args = self.decay_args + args.setdefault('decay_steps', self.iters) + return fluid.layers.polynomial_decay(lr, **args) + else: + raise RuntimeError('Only poly decay support.') + + @property + def optimizer(self) -> fluid.optimizer.Optimizer: + if self.optimizer_type == 'sgd': + lr = self.learning_rate + args = self.optimizer_args + args.setdefault('momentum', 0.9) + return fluid.optimizer.Momentum( + lr, parameter_list=self.model.parameters(), **args) + else: + raise RuntimeError('Only sgd optimizer support.') + + @property + def optimizer_type(self) -> str: + otype = self._optimizer_cfg.get('type') + if not otype: + raise RuntimeError( + 'No optimizer type specified in the configuration file.') + return otype + + @property + def optimizer_args(self) -> dict: + args = self._optimizer_cfg.copy() + args.pop('type') + return args + + @property + def decay_type(self) -> str: + return self._decay['type'] + + @property + def decay_args(self) -> dict: + args = self._decay.copy() + args.pop('type') + return args + + @property + def loss(self) -> list: + if not self._losses: + args = self._loss_cfg.copy() + self._losses = dict() + for key, val in args.items(): + if key == 'types': + self._losses['types'] = [] + for item in args['types']: + self._losses['types'].append(self._load_object(item)) + else: + self._losses[key] = val + if len(self._losses['coef']) != len(self._losses['types']): + raise RuntimeError( + 'The length of coef should equal to types in loss config: {} != {}.' + .format( + len(self._losses['coef']), len(self._losses['types']))) + return self._losses + + @property + def model(self) -> Callable: + if not self._model: + self._model = self._load_object(self._model_cfg) + return self._model + + @property + def train_dataset(self) -> Any: + if not self._train_dataset: + return None + return self._load_object(self._train_dataset) + + @property + def val_dataset(self) -> Any: + if not self._val_dataset: + return None + return self._load_object(self._val_dataset) + + def _load_component(self, com_name: str) -> Any: + com_list = [ + manager.MODELS, manager.BACKBONES, manager.DATASETS, + manager.TRANSFORMS, manager.LOSSES + ] + + for com in com_list: + if com_name in com.components_dict: + return com[com_name] + else: + raise RuntimeError( + 'The specified component was not found {}.'.format(com_name)) + + def _load_object(self, cfg: dict) -> Any: + cfg = cfg.copy() + if 'type' not in cfg: + raise RuntimeError('No object information in {}.'.format(cfg)) + + component = self._load_component(cfg.pop('type')) + + params = {} + for key, val in cfg.items(): + if self._is_meta_type(val): + params[key] = self._load_object(val) + elif isinstance(val, list): + params[key] = [ + self._load_object(item) + if self._is_meta_type(item) else item for item in val + ] + else: + params[key] = val + + return component(**params) + + def _is_meta_type(self, item: Any) -> bool: + return isinstance(item, dict) and 'type' in item diff --git a/dygraph/paddleseg/utils/download.py b/dygraph/paddleseg/utils/download.py new file mode 100644 index 0000000000000000000000000000000000000000..7bf6dd096a4b33587b47bed127673d8fe09aefbb --- /dev/null +++ b/dygraph/paddleseg/utils/download.py @@ -0,0 +1,135 @@ +import os +import sys +import time +import requests +import tarfile +import zipfile +import shutil +import functools + +lasttime = time.time() +FLUSH_INTERVAL = 0.1 + + +def progress(str, end=False): + global lasttime + if end: + str += "\n" + lasttime = 0 + if time.time() - lasttime >= FLUSH_INTERVAL: + sys.stdout.write("\r%s" % str) + lasttime = time.time() + sys.stdout.flush() + + +def _download_file(url, savepath, print_progress): + r = requests.get(url, stream=True) + total_length = r.headers.get('content-length') + + if total_length is None: + with open(savepath, 'wb') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(savepath, 'wb') as f: + dl = 0 + total_length = int(total_length) + starttime = time.time() + if print_progress: + print("Downloading %s" % os.path.basename(savepath)) + for data in r.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if print_progress: + done = int(50 * dl / total_length) + progress("[%-50s] %.2f%%" % + ('=' * done, float(100 * dl) / total_length)) + if print_progress: + progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) + + +def _uncompress_file_zip(filepath, extrapath): + files = zipfile.ZipFile(filepath, 'r') + filelist = files.namelist() + rootpath = filelist[0] + total_num = len(filelist) + for index, file in enumerate(filelist): + files.extract(file, extrapath) + yield total_num, index, rootpath + files.close() + yield total_num, index, rootpath + + +def _uncompress_file_tar(filepath, extrapath, mode="r:gz"): + files = tarfile.open(filepath, mode) + filelist = files.getnames() + total_num = len(filelist) + rootpath = filelist[0] + for index, file in enumerate(filelist): + files.extract(file, extrapath) + yield total_num, index, rootpath + files.close() + yield total_num, index, rootpath + + +def _uncompress_file(filepath, extrapath, delete_file, print_progress): + if print_progress: + print("Uncompress %s" % os.path.basename(filepath)) + + if filepath.endswith("zip"): + handler = _uncompress_file_zip + elif filepath.endswith("tgz"): + handler = _uncompress_file_tar + else: + handler = functools.partial(_uncompress_file_tar, mode="r") + + for total_num, index, rootpath in handler(filepath, extrapath): + if print_progress: + done = int(50 * float(index) / total_num) + progress( + "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num)) + if print_progress: + progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) + + if delete_file: + os.remove(filepath) + + return rootpath + + +def download_file_and_uncompress(url, + savepath=None, + extrapath=None, + extraname=None, + print_progress=True, + cover=False, + delete_file=True): + if savepath is None: + savepath = "." + + if extrapath is None: + extrapath = "." + + savename = url.split("/")[-1] + savepath = os.path.join(savepath, savename) + savename = ".".join(savename.split(".")[:-1]) + savename = os.path.join(extrapath, savename) + extraname = savename if extraname is None else os.path.join( + extrapath, extraname) + + if cover: + if os.path.exists(savepath): + shutil.rmtree(savepath) + if os.path.exists(savename): + shutil.rmtree(savename) + if os.path.exists(extraname): + shutil.rmtree(extraname) + + if not os.path.exists(extraname): + if not os.path.exists(savename): + if not os.path.exists(savepath): + _download_file(url, savepath, print_progress) + savename = _uncompress_file(savepath, extrapath, delete_file, + print_progress) + savename = os.path.join(extrapath, savename) + shutil.move(savename, extraname) + return extraname diff --git a/dygraph/paddleseg/utils/get_environ_info.py b/dygraph/paddleseg/utils/get_environ_info.py new file mode 100644 index 0000000000000000000000000000000000000000..7d789f4d60e875fd11514fa13b901885be7b0024 --- /dev/null +++ b/dygraph/paddleseg/utils/get_environ_info.py @@ -0,0 +1,118 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +from collections import OrderedDict +import subprocess +import glob + +import paddle +import paddle.fluid as fluid +import cv2 + +IS_WINDOWS = sys.platform == 'win32' + + +def _find_cuda_home(): + '''Finds the CUDA install path. It refers to the implementation of + pytorch . + ''' + # Guess #1 + cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') + if cuda_home is None: + # Guess #2 + try: + which = 'where' if IS_WINDOWS else 'which' + nvcc = subprocess.check_output([which, + 'nvcc']).decode().rstrip('\r\n') + cuda_home = os.path.dirname(os.path.dirname(nvcc)) + except Exception: + # Guess #3 + if IS_WINDOWS: + cuda_homes = glob.glob( + 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') + if len(cuda_homes) == 0: + cuda_home = '' + else: + cuda_home = cuda_homes[0] + else: + cuda_home = '/usr/local/cuda' + if not os.path.exists(cuda_home): + cuda_home = None + return cuda_home + + +def _get_nvcc_info(cuda_home): + if cuda_home is not None and os.path.isdir(cuda_home): + try: + nvcc = os.path.join(cuda_home, 'bin/nvcc') + nvcc = subprocess.check_output( + "{} -V".format(nvcc), shell=True).decode() + nvcc = nvcc.strip().split('\n')[-1] + except subprocess.SubprocessError: + nvcc = "Not Available" + return nvcc + + +def _get_gpu_info(): + try: + gpu_info = subprocess.check_output(['nvidia-smi', + '-L']).decode().strip() + gpu_info = gpu_info.split('\n') + for i in range(len(gpu_info)): + gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4]) + except: + gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.' + return gpu_info + + +def get_environ_info(): + """collect environment information""" + env_info = {} + env_info['System Platform'] = sys.platform + if env_info['System Platform'] == 'linux': + try: + lsb_v = subprocess.check_output(['lsb_release', + '-v']).decode().strip() + lsb_v = lsb_v.replace('\t', ' ') + lsb_d = subprocess.check_output(['lsb_release', + '-d']).decode().strip() + lsb_d = lsb_d.replace('\t', ' ') + env_info['LSB'] = [lsb_v, lsb_d] + except: + pass + + env_info['Python'] = sys.version.replace('\n', '') + + compiled_with_cuda = paddle.fluid.is_compiled_with_cuda() + env_info['Paddle compiled with cuda'] = compiled_with_cuda + + if compiled_with_cuda: + cuda_home = _find_cuda_home() + env_info['NVCC'] = _get_nvcc_info(cuda_home) + gpu_nums = fluid.core.get_cuda_device_count() + env_info['GPUs used'] = gpu_nums + env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get( + 'CUDA_VISIBLE_DEVICES') + env_info['GPU'] = _get_gpu_info() + + gcc = subprocess.check_output(['gcc', '--version']).decode() + gcc = gcc.strip().split('\n')[0] + env_info['GCC'] = gcc + + env_info['PaddlePaddle'] = paddle.__version__ + env_info['OpenCV'] = cv2.__version__ + + return env_info diff --git a/dygraph/paddleseg/utils/logger.py b/dygraph/paddleseg/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..015948f65090e40895f6d4a72a75a11f2b155447 --- /dev/null +++ b/dygraph/paddleseg/utils/logger.py @@ -0,0 +1,50 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import os +import sys + +from paddle.fluid.dygraph.parallel import ParallelEnv + +levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} +log_level = 2 + + +def log(level=2, message=""): + if ParallelEnv().local_rank == 0: + current_time = time.time() + time_array = time.localtime(current_time) + current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) + if log_level >= level: + print( + "{} [{}]\t{}".format(current_time, levels[level], + message).encode("utf-8").decode("latin1")) + sys.stdout.flush() + + +def debug(message=""): + log(level=3, message=message) + + +def info(message=""): + log(level=2, message=message) + + +def warning(message=""): + log(level=1, message=message) + + +def error(message=""): + log(level=0, message=message) diff --git a/dygraph/paddleseg/utils/metrics.py b/dygraph/paddleseg/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..b107cbd57a936fb909086567fc8b703fb86963b7 --- /dev/null +++ b/dygraph/paddleseg/utils/metrics.py @@ -0,0 +1,144 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +from scipy.sparse import csr_matrix + + +class ConfusionMatrix(object): + """ + Confusion Matrix for segmentation evaluation + """ + + def __init__(self, num_classes=2, streaming=False): + self.confusion_matrix = np.zeros([num_classes, num_classes], + dtype='int64') + self.num_classes = num_classes + self.streaming = streaming + + def calculate(self, pred, label, ignore=None): + # If not in streaming mode, clear matrix everytime when call `calculate` + if not self.streaming: + self.zero_matrix() + + label = np.transpose(label, (0, 2, 3, 1)) + ignore = np.transpose(ignore, (0, 2, 3, 1)) + mask = np.array(ignore) == 1 + + label = np.asarray(label)[mask] + pred = np.asarray(pred)[mask] + one = np.ones_like(pred) + # Accumuate ([row=label, col=pred], 1) into sparse matrix + spm = csr_matrix((one, (label, pred)), + shape=(self.num_classes, self.num_classes)) + spm = spm.todense() + self.confusion_matrix += spm + + def zero_matrix(self): + """ Clear confusion matrix """ + self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], + dtype='int64') + + def mean_iou(self): + iou_list = [] + avg_iou = 0 + # TODO: use numpy sum axis api to simpliy + vji = np.zeros(self.num_classes, dtype=int) + vij = np.zeros(self.num_classes, dtype=int) + for j in range(self.num_classes): + v_j = 0 + for i in range(self.num_classes): + v_j += self.confusion_matrix[j][i] + vji[j] = v_j + + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + for c in range(self.num_classes): + total = vji[c] + vij[c] - self.confusion_matrix[c][c] + if total == 0: + iou = 0 + else: + iou = float(self.confusion_matrix[c][c]) / total + avg_iou += iou + iou_list.append(iou) + avg_iou = float(avg_iou) / float(self.num_classes) + return np.array(iou_list), avg_iou + + def accuracy(self): + total = self.confusion_matrix.sum() + total_right = 0 + for c in range(self.num_classes): + total_right += self.confusion_matrix[c][c] + if total == 0: + avg_acc = 0 + else: + avg_acc = float(total_right) / total + + vij = np.zeros(self.num_classes, dtype=int) + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + acc_list = [] + for c in range(self.num_classes): + if vij[c] == 0: + acc = 0 + else: + acc = self.confusion_matrix[c][c] / float(vij[c]) + acc_list.append(acc) + return np.array(acc_list), avg_acc + + def kappa(self): + vji = np.zeros(self.num_classes) + vij = np.zeros(self.num_classes) + for j in range(self.num_classes): + v_j = 0 + for i in range(self.num_classes): + v_j += self.confusion_matrix[j][i] + vji[j] = v_j + + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + total = self.confusion_matrix.sum() + + # avoid spillovers + # TODO: is it reasonable to hard code 10000.0? + total = float(total) / 10000.0 + vji = vji / 10000.0 + vij = vij / 10000.0 + + tp = 0 + tc = 0 + for c in range(self.num_classes): + tp += vji[c] * vij[c] + tc += self.confusion_matrix[c][c] + + tc = tc / 10000.0 + pe = tp / (total * total) + po = tc / total + + kappa = (po - pe) / (1 - pe) + return kappa diff --git a/dygraph/paddleseg/utils/timer.py b/dygraph/paddleseg/utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..4ebbddc9a154de4a36d6b6d9b437e14382031c49 --- /dev/null +++ b/dygraph/paddleseg/utils/timer.py @@ -0,0 +1,60 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + + +class Timer(object): + """ Simple timer class for measuring time consuming """ + + def __init__(self): + self._start_time = 0.0 + self._end_time = 0.0 + self._elapsed_time = 0.0 + self._is_running = False + + def start(self): + self._is_running = True + self._start_time = time.time() + + def restart(self): + self.start() + + def stop(self): + self._is_running = False + self._end_time = time.time() + + def elapsed_time(self): + self._end_time = time.time() + self._elapsed_time = self._end_time - self._start_time + if not self.is_running: + return 0.0 + + return self._elapsed_time + + @property + def is_running(self): + return self._is_running + + +def calculate_eta(remaining_step, speed): + if remaining_step < 0: + remaining_step = 0 + remaining_time = int(remaining_step * speed) + result = "{:0>2}:{:0>2}:{:0>2}" + arr = [] + for i in range(2, -1, -1): + arr.append(int(remaining_time / 60**i)) + remaining_time %= 60**i + return result.format(*arr) diff --git a/dygraph/paddleseg/utils/utils.py b/dygraph/paddleseg/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0b7d87169a76a196926e7f9e2017ebd42a5605ad --- /dev/null +++ b/dygraph/paddleseg/utils/utils.py @@ -0,0 +1,141 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import math +import cv2 +import paddle.fluid as fluid + +from . import logger + + +def seconds_to_hms(seconds): + h = math.floor(seconds / 3600) + m = math.floor((seconds - h * 3600) / 60) + s = int(seconds - h * 3600 - m * 60) + hms_str = "{}:{}:{}".format(h, m, s) + return hms_str + + +def load_pretrained_model(model, pretrained_model): + if pretrained_model is not None: + logger.info('Load pretrained model from {}'.format(pretrained_model)) + if os.path.exists(pretrained_model): + ckpt_path = os.path.join(pretrained_model, 'model') + try: + para_state_dict, _ = fluid.load_dygraph(ckpt_path) + except: + para_state_dict = fluid.load_program_state(pretrained_model) + + model_state_dict = model.state_dict() + keys = model_state_dict.keys() + num_params_loaded = 0 + for k in keys: + if k not in para_state_dict: + logger.warning("{} is not in pretrained model".format(k)) + elif list(para_state_dict[k].shape) != list( + model_state_dict[k].shape): + logger.warning( + "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})" + .format(k, para_state_dict[k].shape, + model_state_dict[k].shape)) + else: + model_state_dict[k] = para_state_dict[k] + num_params_loaded += 1 + model.set_dict(model_state_dict) + logger.info("There are {}/{} varaibles are loaded.".format( + num_params_loaded, len(model_state_dict))) + + else: + raise ValueError( + 'The pretrained model directory is not Found: {}'.format( + pretrained_model)) + else: + logger.warning('No pretrained model to load, train from scratch') + + +def resume(model, optimizer, resume_model): + if resume_model is not None: + logger.info('Resume model from {}'.format(resume_model)) + if os.path.exists(resume_model): + resume_model = os.path.normpath(resume_model) + ckpt_path = os.path.join(resume_model, 'model') + para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) + model.set_dict(para_state_dict) + optimizer.set_dict(opti_state_dict) + epoch = resume_model.split('_')[-1] + if epoch.isdigit(): + epoch = int(epoch) + return epoch + else: + raise ValueError( + 'The resume model directory is not Found: {}'.format( + resume_model)) + else: + logger.info('No model need to resume') + + +def visualize(image, result, save_dir=None, weight=0.6): + """ + Convert segment result to color image, and save added image. + Args: + image: the path of origin image + result: the predict result of image + save_dir: the directory for saving visual image + weight: the image weight of visual image, and the result weight is (1 - weight) + """ + color_map = get_color_map_list(256) + color_map = np.array(color_map).astype("uint8") + # Use OpenCV LUT for color mapping + c1 = cv2.LUT(result, color_map[:, 0]) + c2 = cv2.LUT(result, color_map[:, 1]) + c3 = cv2.LUT(result, color_map[:, 2]) + pseudo_img = np.dstack((c1, c2, c3)) + + im = cv2.imread(image) + vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) + + if save_dir is not None: + if not os.path.exists(save_dir): + os.makedirs(save_dir) + image_name = os.path.split(image)[-1] + out_path = os.path.join(save_dir, image_name) + cv2.imwrite(out_path, vis_result) + else: + return vis_result + + +def get_color_map_list(num_classes): + """ Returns the color map for visualizing the segmentation mask, + which can support arbitrary number of classes. + Args: + num_classes: Number of classes + Returns: + The color map + """ + num_classes += 1 + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + color_map = color_map[1:] + return color_map diff --git a/dygraph/train.py b/dygraph/train.py index 789cdf451d300128aa3341af980dba7664726878..1e56fce70e9e157ca45bf92d7444a327dd2a3951 100644 --- a/dygraph/train.py +++ b/dygraph/train.py @@ -17,12 +17,10 @@ import argparse import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv -import dygraph -from dygraph.cvlibs import manager -from dygraph.utils import get_environ_info -from dygraph.utils import logger -from dygraph.utils import Config -from dygraph.core import train +import paddleseg +from paddleseg.cvlibs import manager +from paddleseg.utils import get_environ_info, logger, Config +from paddleseg.core import train def parse_args(): diff --git a/dygraph/val.py b/dygraph/val.py index f4b7d6399c155d629add1888131c0d6bf7430421..6dcf040777aeab35fd742870979b68eac8d5d85b 100644 --- a/dygraph/val.py +++ b/dygraph/val.py @@ -17,11 +17,10 @@ import argparse import paddle.fluid as fluid from paddle.fluid.dygraph.parallel import ParallelEnv -import dygraph -from dygraph.cvlibs import manager -from dygraph.utils import get_environ_info -from dygraph.utils import Config -from dygraph.core import evaluate +import paddleseg +from paddleseg.cvlibs import manager +from paddleseg.utils import get_environ_info, Config +from paddleseg.core import evaluate def parse_args(): diff --git a/slim/quantization/eval_quant.py b/slim/quantization/eval_quant.py index e309858f0deecc53783d6fcc58dfb94ef4014fd6..89d1465e30b89f9e3ce65b16f017cb53d1dff039 100644 --- a/slim/quantization/eval_quant.py +++ b/slim/quantization/eval_quant.py @@ -109,7 +109,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator( - data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) + data_generator, drop_last=False, batch_size=1) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() @@ -142,6 +142,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): fluid.io.load_persistables(exe, ckpt_dir, main_program=test_prog) if kwargs['convert']: test_prog = convert(test_prog, place, config) + compiled_test_prog = fluid.CompiledProgram(test_prog) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") @@ -157,7 +158,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): try: step += 1 loss, pred, grts, masks = exe.run( - test_prog, fetch_list=fetch_list, return_numpy=True) + compiled_test_prog, fetch_list=fetch_list, return_numpy=True) loss = np.mean(np.array(loss))