diff --git a/dygraph/core/__init__.py b/dygraph/core/__init__.py deleted file mode 100644 index 202629f542f40a2741cb12022adb10d7a56861b5..0000000000000000000000000000000000000000 --- a/dygraph/core/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .train import train -from .val import evaluate -from .infer import infer - -__all__ = ['train', 'evaluate', 'infer'] diff --git a/dygraph/core/infer.py b/dygraph/core/infer.py deleted file mode 100644 index 499890d216c173f4361ae7e5f18027add8cfb2a6..0000000000000000000000000000000000000000 --- a/dygraph/core/infer.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from paddle.fluid.dygraph.base import to_variable -import numpy as np -import paddle.fluid as fluid -import cv2 -import tqdm - -from dygraph import utils -import dygraph.utils.logger as logger - - -def mkdir(path): - sub_dir = os.path.dirname(path) - if not os.path.exists(sub_dir): - os.makedirs(sub_dir) - - -def infer(model, test_dataset=None, model_dir=None, save_dir='output'): - ckpt_path = os.path.join(model_dir, 'model') - para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) - model.set_dict(para_state_dict) - model.eval() - - added_saved_dir = os.path.join(save_dir, 'added') - pred_saved_dir = os.path.join(save_dir, 'prediction') - - logger.info("Start to predict...") - for im, im_info, im_path in tqdm.tqdm(test_dataset): - im = to_variable(im) - pred, _ = model(im) - pred = pred.numpy() - pred = np.squeeze(pred).astype('uint8') - for info in im_info[::-1]: - if info[0] == 'resize': - h, w = info[1][0], info[1][1] - pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - elif info[0] == 'padding': - h, w = info[1][0], info[1][1] - pred = pred[0:h, 0:w] - else: - raise Exception("Unexpected info '{}' in im_info".format( - info[0])) - - im_file = im_path.replace(test_dataset.dataset_root, '') - if im_file[0] == '/': - im_file = im_file[1:] - # save added image - added_image = utils.visualize(im_path, pred, weight=0.6) - added_image_path = os.path.join(added_saved_dir, im_file) - mkdir(added_image_path) - cv2.imwrite(added_image_path, added_image) - - # save prediction - pred_im = utils.visualize(im_path, pred, weight=0.0) - pred_saved_path = os.path.join(pred_saved_dir, im_file) - mkdir(pred_saved_path) - cv2.imwrite(pred_saved_path, pred_im) diff --git a/dygraph/core/train.py b/dygraph/core/train.py deleted file mode 100644 index e7d33a1f0cbb59b39aeabc1fbeb1a4225ea2db33..0000000000000000000000000000000000000000 --- a/dygraph/core/train.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle -import paddle.fluid as fluid -from paddle.fluid.dygraph.parallel import ParallelEnv -from paddle.fluid.io import DataLoader -# from paddle.incubate.hapi.distributed import DistributedBatchSampler -from paddle.io import DistributedBatchSampler -import paddle.nn.functional as F - -import dygraph.utils.logger as logger -from dygraph.utils import load_pretrained_model -from dygraph.utils import resume -from dygraph.utils import Timer, calculate_eta -from .val import evaluate - - -def check_logits_losses(logits, losses): - len_logits = len(logits) - len_losses = len(losses['types']) - if len_logits != len_losses: - raise RuntimeError( - 'The length of logits should equal to the types of loss config: {} != {}.' - .format(len_logits, len_losses)) - - -def loss_computation(logits, label, losses): - check_logits_losses(logits, losses) - loss = 0 - for i in range(len(logits)): - logit = logits[i] - if logit.shape[-2:] != label.shape[-2:]: - logit = F.resize_bilinear(logit, label.shape[-2:]) - loss_i = losses['types'][i](logit, label) - loss += losses['coef'][i] * loss_i - return loss - - -def train(model, - train_dataset, - places=None, - eval_dataset=None, - optimizer=None, - save_dir='output', - iters=10000, - batch_size=2, - resume_model=None, - save_interval_iters=1000, - log_iters=10, - num_classes=None, - num_workers=8, - use_vdl=False, - losses=None, - ignore_index=255): - nranks = ParallelEnv().nranks - - start_iter = 0 - if resume_model is not None: - start_iter = resume(model, optimizer, resume_model) - - if not os.path.isdir(save_dir): - if os.path.exists(save_dir): - os.remove(save_dir) - os.makedirs(save_dir) - - if nranks > 1: - strategy = fluid.dygraph.prepare_context() - ddp_model = fluid.dygraph.DataParallel(model, strategy) - - batch_sampler = DistributedBatchSampler( - train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) - loader = DataLoader( - train_dataset, - batch_sampler=batch_sampler, - places=places, - num_workers=num_workers, - return_list=True, - ) - - if use_vdl: - from visualdl import LogWriter - log_writer = LogWriter(save_dir) - - timer = Timer() - avg_loss = 0.0 - iters_per_epoch = len(batch_sampler) - best_mean_iou = -1.0 - best_model_iter = -1 - train_reader_cost = 0.0 - train_batch_cost = 0.0 - timer.start() - - iter = start_iter - while iter < iters: - for data in loader: - iter += 1 - if iter > iters: - break - train_reader_cost += timer.elapsed_time() - images = data[0] - labels = data[1].astype('int64') - if nranks > 1: - logits = ddp_model(images) - loss = loss_computation(logits, labels, losses) - # loss = ddp_model(images, labels) - # apply_collective_grads sum grads over multiple gpus. - loss = ddp_model.scale_loss(loss) - loss.backward() - ddp_model.apply_collective_grads() - else: - logits = model(images) - loss = loss_computation(logits, labels, losses) - # loss = model(images, labels) - loss.backward() - optimizer.minimize(loss) - model.clear_gradients() - avg_loss += loss.numpy()[0] - lr = optimizer.current_step_lr() - train_batch_cost += timer.elapsed_time() - if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0: - avg_loss /= log_iters - avg_train_reader_cost = train_reader_cost / log_iters - avg_train_batch_cost = train_batch_cost / log_iters - train_reader_cost = 0.0 - train_batch_cost = 0.0 - remain_iters = iters - iter - eta = calculate_eta(remain_iters, avg_train_batch_cost) - logger.info( - "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}" - .format((iter - 1) // iters_per_epoch + 1, iter, iters, - avg_loss * nranks, lr, avg_train_batch_cost, - avg_train_reader_cost, eta)) - if use_vdl: - log_writer.add_scalar('Train/loss', avg_loss * nranks, iter) - log_writer.add_scalar('Train/lr', lr, iter) - log_writer.add_scalar('Train/batch_cost', - avg_train_batch_cost, iter) - log_writer.add_scalar('Train/reader_cost', - avg_train_reader_cost, iter) - avg_loss = 0.0 - - if (iter % save_interval_iters == 0 - or iter == iters) and ParallelEnv().local_rank == 0: - current_save_dir = os.path.join(save_dir, - "iter_{}".format(iter)) - if not os.path.isdir(current_save_dir): - os.makedirs(current_save_dir) - fluid.save_dygraph(model.state_dict(), - os.path.join(current_save_dir, 'model')) - fluid.save_dygraph(optimizer.state_dict(), - os.path.join(current_save_dir, 'model')) - - if eval_dataset is not None: - mean_iou, avg_acc = evaluate( - model, - eval_dataset, - model_dir=current_save_dir, - num_classes=num_classes, - ignore_index=ignore_index, - iter_id=iter) - if mean_iou > best_mean_iou: - best_mean_iou = mean_iou - best_model_iter = iter - best_model_dir = os.path.join(save_dir, "best_model") - fluid.save_dygraph( - model.state_dict(), - os.path.join(best_model_dir, 'model')) - logger.info( - 'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}' - .format(best_model_iter, best_mean_iou)) - - if use_vdl: - log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter) - log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter) - model.train() - timer.restart() - if use_vdl: - log_writer.close() diff --git a/dygraph/core/val.py b/dygraph/core/val.py deleted file mode 100644 index 22e84a314cd4ffe8093f81dad724f3d7d12a05fe..0000000000000000000000000000000000000000 --- a/dygraph/core/val.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import numpy as np -import tqdm -import cv2 -from paddle.fluid.dygraph.base import to_variable -import paddle.fluid as fluid -import paddle.nn.functional as F -import paddle - -import dygraph.utils.logger as logger -from dygraph.utils import ConfusionMatrix -from dygraph.utils import Timer, calculate_eta - - -def evaluate(model, - eval_dataset=None, - model_dir=None, - num_classes=None, - ignore_index=255, - iter_id=None): - ckpt_path = os.path.join(model_dir, 'model') - para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) - model.set_dict(para_state_dict) - model.eval() - - total_iters = len(eval_dataset) - conf_mat = ConfusionMatrix(num_classes, streaming=True) - - logger.info( - "Start to evaluating(total_samples={}, total_iters={})...".format( - len(eval_dataset), total_iters)) - timer = Timer() - timer.start() - for iter, (im, im_info, label) in tqdm.tqdm( - enumerate(eval_dataset), total=total_iters): - im = to_variable(im) - # pred, _ = model(im) - logits = model(im) - pred = paddle.argmax(logits[0], axis=1) - pred = pred.numpy().astype('float32') - pred = np.squeeze(pred) - for info in im_info[::-1]: - if info[0] == 'resize': - h, w = info[1][0], info[1][1] - pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - elif info[0] == 'padding': - h, w = info[1][0], info[1][1] - pred = pred[0:h, 0:w] - else: - raise Exception("Unexpected info '{}' in im_info".format( - info[0])) - pred = pred[np.newaxis, :, :, np.newaxis] - pred = pred.astype('int64') - mask = label != ignore_index - - conf_mat.calculate(pred=pred, label=label, ignore=mask) - _, iou = conf_mat.mean_iou() - - time_iter = timer.elapsed_time() - remain_iter = total_iters - iter - 1 - logger.debug( - "[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}" - .format(iter_id, iter + 1, total_iters, iou, time_iter, - calculate_eta(remain_iter, time_iter))) - timer.restart() - - category_iou, miou = conf_mat.mean_iou() - category_acc, macc = conf_mat.accuracy() - logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format( - len(eval_dataset), macc, miou)) - logger.info("[EVAL] Category IoU: " + str(category_iou)) - logger.info("[EVAL] Category Acc: " + str(category_acc)) - logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa())) - return miou, macc diff --git a/dygraph/cvlibs/__init__.py b/dygraph/cvlibs/__init__.py deleted file mode 100644 index 18812001388cbfd1ecf7dc4d38398ddd91711af4..0000000000000000000000000000000000000000 --- a/dygraph/cvlibs/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import manager -from . import param_init diff --git a/dygraph/cvlibs/manager.py b/dygraph/cvlibs/manager.py deleted file mode 100644 index 339070069c7e39532ec7fe2c826851a8d0f53df6..0000000000000000000000000000000000000000 --- a/dygraph/cvlibs/manager.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- encoding: utf-8 -*- -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections.abc import Sequence -import inspect - - -class ComponentManager: - """ - Implement a manager class to add the new component properly. - The component can be added as either class or function type. - For example: - >>> model_manager = ComponentManager() - >>> class AlexNet: ... - >>> class ResNet: ... - >>> model_manager.add_component(AlexNet) - >>> model_manager.add_component(ResNet) - or pass a sequence alliteratively: - >>> model_manager.add_component([AlexNet, ResNet]) - >>> print(model_manager.components_dict) - output: {'AlexNet': , 'ResNet': } - - Or an easier way, using it as a Python decorator, while just add it above the class declaration. - >>> model_manager = ComponentManager() - >>> @model_manager.add_component - >>> class AlexNet: ... - >>> @model_manager.add_component - >>> class ResNet: ... - >>> print(model_manager.components_dict) - output: {'AlexNet': , 'ResNet': } - """ - - def __init__(self): - self._components_dict = dict() - - def __len__(self): - return len(self._components_dict) - - def __repr__(self): - return "{}:{}".format(self.__class__.__name__, - list(self._components_dict.keys())) - - def __getitem__(self, item): - if item not in self._components_dict.keys(): - raise KeyError("{} does not exist in the current {}".format( - item, self)) - return self._components_dict[item] - - @property - def components_dict(self): - return self._components_dict - - def _add_single_component(self, component): - """ - Add a single component into the corresponding manager - - Args: - component (function | class): a new component - - Returns: - None - """ - - # Currently only support class or function type - if not (inspect.isclass(component) or inspect.isfunction(component)): - raise TypeError( - "Expect class/function type, but received {}".format( - type(component))) - - # Obtain the internal name of the component - component_name = component.__name__ - - # Check whether the component was added already - if component_name in self._components_dict.keys(): - raise KeyError("{} exists already!".format(component_name)) - else: - # Take the internal name of the component as its key - self._components_dict[component_name] = component - - def add_component(self, components): - """ - Add component(s) into the corresponding manager - - Args: - components (function | class | list | tuple): support three types of components - - Returns: - None - """ - - # Check whether the type is a sequence - if isinstance(components, Sequence): - for component in components: - self._add_single_component(component) - else: - component = components - self._add_single_component(component) - - return components - - -MODELS = ComponentManager() -BACKBONES = ComponentManager() -DATASETS = ComponentManager() -TRANSFORMS = ComponentManager() -LOSSES = ComponentManager() diff --git a/dygraph/cvlibs/param_init.py b/dygraph/cvlibs/param_init.py deleted file mode 100644 index 567399c0a0c7d2310931b1c0ccae13cd0d5422b1..0000000000000000000000000000000000000000 --- a/dygraph/cvlibs/param_init.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as fluid - - -def constant_init(param, **kwargs): - initializer = fluid.initializer.Constant(**kwargs) - initializer(param, param.block) - - -def normal_init(param, **kwargs): - initializer = fluid.initializer.Normal(**kwargs) - initializer(param, param.block) diff --git a/dygraph/datasets/__init__.py b/dygraph/datasets/__init__.py deleted file mode 100644 index 37d8da36997b3ec2a74b92199242eba126a0cefc..0000000000000000000000000000000000000000 --- a/dygraph/datasets/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .dataset import Dataset -from .optic_disc_seg import OpticDiscSeg -from .cityscapes import Cityscapes -from .voc import PascalVOC -from .ade import ADE20K - -DATASETS = { - "OpticDiscSeg": OpticDiscSeg, - "Cityscapes": Cityscapes, - "PascalVOC": PascalVOC, - "ADE20K": ADE20K -} diff --git a/dygraph/datasets/ade.py b/dygraph/datasets/ade.py deleted file mode 100644 index 8cb8ec2cebfac98d52283ccd21796553db36bffe..0000000000000000000000000000000000000000 --- a/dygraph/datasets/ade.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import numpy as np -from PIL import Image - -from .dataset import Dataset -from dygraph.utils.download import download_file_and_uncompress -from dygraph.cvlibs import manager -from dygraph.transforms import Compose - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') -URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip" - - -@manager.DATASETS.add_component -class ADE20K(Dataset): - """ADE20K dataset `http://sceneparsing.csail.mit.edu/`. - Args: - dataset_root: The dataset directory. - mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'. - transforms: Transforms for image. - download: Whether to download dataset if `dataset_root` is None. - """ - - def __init__(self, - dataset_root=None, - mode='train', - transforms=None, - download=True): - self.dataset_root = dataset_root - self.transforms = Compose(transforms) - self.mode = mode - self.file_list = list() - self.num_classes = 150 - - if mode.lower() not in ['train', 'val']: - raise Exception( - "`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}." - .format(mode)) - - if self.transforms is None: - raise Exception("`transforms` is necessary, but it is None.") - - if self.dataset_root is None: - if not download: - raise Exception( - "`dataset_root` not set and auto download disabled.") - self.dataset_root = download_file_and_uncompress( - url=URL, - savepath=DATA_HOME, - extrapath=DATA_HOME, - extraname='ADEChallengeData2016') - elif not os.path.exists(self.dataset_root): - raise Exception('there is not `dataset_root`: {}.'.format( - self.dataset_root)) - - if mode == 'train': - img_dir = os.path.join(self.dataset_root, 'images/training') - grt_dir = os.path.join(self.dataset_root, 'annotations/training') - elif mode == 'val': - img_dir = os.path.join(self.dataset_root, 'images/validation') - grt_dir = os.path.join(self.dataset_root, 'annotations/validation') - img_files = os.listdir(img_dir) - grt_files = [i.replace('.jpg', '.png') for i in img_files] - for i in range(len(img_files)): - img_path = os.path.join(img_dir, img_files[i]) - grt_path = os.path.join(grt_dir, grt_files[i]) - self.file_list.append([img_path, grt_path]) - - def __getitem__(self, idx): - image_path, grt_path = self.file_list[idx] - if self.mode == 'test': - im, im_info, _ = self.transforms(im=image_path) - im = im[np.newaxis, ...] - return im, im_info, image_path - elif self.mode == 'val': - im, im_info, _ = self.transforms(im=image_path) - im = im[np.newaxis, ...] - label = np.asarray(Image.open(grt_path)) - label = label - 1 - label = label[np.newaxis, np.newaxis, :, :] - return im, im_info, label - else: - im, im_info, label = self.transforms(im=image_path, label=grt_path) - label = label - 1 - return im, label diff --git a/dygraph/datasets/cityscapes.py b/dygraph/datasets/cityscapes.py deleted file mode 100644 index ee28754d290ec9ca0526c34d10d9b0ccaa89e6b7..0000000000000000000000000000000000000000 --- a/dygraph/datasets/cityscapes.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import glob - -from .dataset import Dataset -from dygraph.cvlibs import manager -from dygraph.transforms import Compose - - -@manager.DATASETS.add_component -class Cityscapes(Dataset): - """Cityscapes dataset `https://www.cityscapes-dataset.com/`. - The folder structure is as follow: - cityscapes - | - |--leftImg8bit - | |--train - | |--val - | |--test - | - |--gtFine - | |--train - | |--val - | |--test - Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools. - - Args: - dataset_root: Cityscapes dataset directory. - mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. - transforms: Transforms for image. - """ - - def __init__(self, dataset_root, transforms=None, mode='train'): - self.dataset_root = dataset_root - self.transforms = Compose(transforms) - self.file_list = list() - self.mode = mode - self.num_classes = 19 - - if mode.lower() not in ['train', 'val', 'test']: - raise Exception( - "mode should be 'train', 'val' or 'test', but got {}.".format( - mode)) - - if self.transforms is None: - raise Exception("`transforms` is necessary, but it is None.") - - img_dir = os.path.join(self.dataset_root, 'leftImg8bit') - grt_dir = os.path.join(self.dataset_root, 'gtFine') - if self.dataset_root is None or not os.path.isdir( - self.dataset_root) or not os.path.isdir( - img_dir) or not os.path.isdir(grt_dir): - raise Exception( - "The dataset is not Found or the folder structure is nonconfoumance." - ) - - grt_files = sorted( - glob.glob( - os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png'))) - img_files = sorted( - glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png'))) - - self.file_list = [[img_path, grt_path] - for img_path, grt_path in zip(img_files, grt_files)] diff --git a/dygraph/datasets/dataset.py b/dygraph/datasets/dataset.py deleted file mode 100644 index c65e20fd2e97511baf4159a3a1eaf2661927a21e..0000000000000000000000000000000000000000 --- a/dygraph/datasets/dataset.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle.fluid as fluid -import numpy as np -from PIL import Image -from dygraph.cvlibs import manager - -from dygraph.transforms import Compose - - -@manager.DATASETS.add_component -class Dataset(fluid.io.Dataset): - """Pass in a custom dataset that conforms to the format. - - Args: - dataset_root: The dataset directory. - num_classes: Number of classes. - mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. - train_list: The train dataset file. When image_set is 'train', train_list is necessary. - The contents of train_list file are as follow: - image1.jpg ground_truth1.png - image2.jpg ground_truth2.png - val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary. - The contents is the same as train_list - test_list: The test dataset file. When image_set is 'test', test_list is necessary. - The annotation file is not necessary in test_list file. - separator: The separator of dataset list. Default: ' '. - transforms: Transforms for image. - - Examples: - todo - - """ - - def __init__(self, - dataset_root, - num_classes, - mode='train', - train_list=None, - val_list=None, - test_list=None, - separator=' ', - transforms=None): - self.dataset_root = dataset_root - self.transforms = Compose(transforms) - self.file_list = list() - self.mode = mode - self.num_classes = num_classes - - if mode.lower() not in ['train', 'val', 'test']: - raise Exception( - "mode should be 'train', 'val' or 'test', but got {}.".format( - mode)) - - if self.transforms is None: - raise Exception("`transforms` is necessary, but it is None.") - - self.dataset_root = dataset_root - if not os.path.exists(self.dataset_root): - raise Exception('there is not `dataset_root`: {}.'.format( - self.dataset_root)) - - if mode == 'train': - if train_list is None: - raise Exception( - 'When `mode` is "train", `train_list` is necessary, but it is None.' - ) - elif not os.path.exists(train_list): - raise Exception( - '`train_list` is not found: {}'.format(train_list)) - else: - file_list = train_list - elif mode == 'val': - if val_list is None: - raise Exception( - 'When `mode` is "val", `val_list` is necessary, but it is None.' - ) - elif not os.path.exists(val_list): - raise Exception('`val_list` is not found: {}'.format(val_list)) - else: - file_list = val_list - else: - if test_list is None: - raise Exception( - 'When `mode` is "test", `test_list` is necessary, but it is None.' - ) - elif not os.path.exists(test_list): - raise Exception( - '`test_list` is not found: {}'.format(test_list)) - else: - file_list = test_list - - with open(file_list, 'r') as f: - for line in f: - items = line.strip().split(separator) - if len(items) != 2: - if mode == 'train' or mode == 'val': - raise Exception( - "File list format incorrect! In training or evaluation task it should be" - " image_name{}label_name\\n".format(separator)) - image_path = os.path.join(self.dataset_root, items[0]) - grt_path = None - else: - image_path = os.path.join(self.dataset_root, items[0]) - grt_path = os.path.join(self.dataset_root, items[1]) - self.file_list.append([image_path, grt_path]) - - def __getitem__(self, idx): - image_path, grt_path = self.file_list[idx] - if self.mode == 'test': - im, im_info, _ = self.transforms(im=image_path) - im = im[np.newaxis, ...] - return im, im_info, image_path - elif self.mode == 'val': - im, im_info, _ = self.transforms(im=image_path) - im = im[np.newaxis, ...] - label = np.asarray(Image.open(grt_path)) - label = label[np.newaxis, np.newaxis, :, :] - return im, im_info, label - else: - im, im_info, label = self.transforms(im=image_path, label=grt_path) - return im, label - - def __len__(self): - return len(self.file_list) diff --git a/dygraph/datasets/optic_disc_seg.py b/dygraph/datasets/optic_disc_seg.py deleted file mode 100644 index 2c6d2b2d56febbe4b45130528c970a43e53d0fd9..0000000000000000000000000000000000000000 --- a/dygraph/datasets/optic_disc_seg.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from .dataset import Dataset -from dygraph.utils.download import download_file_and_uncompress -from dygraph.cvlibs import manager -from dygraph.transforms import Compose - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') -URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" - - -@manager.DATASETS.add_component -class OpticDiscSeg(Dataset): - def __init__(self, - dataset_root=None, - transforms=None, - mode='train', - download=True): - self.dataset_root = dataset_root - self.transforms = Compose(transforms) - self.file_list = list() - self.mode = mode - self.num_classes = 2 - - if mode.lower() not in ['train', 'val', 'test']: - raise Exception( - "`mode` should be 'train', 'val' or 'test', but got {}.".format( - mode)) - - if self.transforms is None: - raise Exception("`transforms` is necessary, but it is None.") - - if self.dataset_root is None: - if not download: - raise Exception( - "`data_root` not set and auto download disabled.") - self.dataset_root = download_file_and_uncompress( - url=URL, savepath=DATA_HOME, extrapath=DATA_HOME) - elif not os.path.exists(self.dataset_root): - raise Exception('there is not `dataset_root`: {}.'.format( - self.dataset_root)) - - if mode == 'train': - file_list = os.path.join(self.dataset_root, 'train_list.txt') - elif mode == 'val': - file_list = os.path.join(self.dataset_root, 'val_list.txt') - else: - file_list = os.path.join(self.dataset_root, 'test_list.txt') - - with open(file_list, 'r') as f: - for line in f: - items = line.strip().split() - if len(items) != 2: - if mode == 'train' or mode == 'val': - raise Exception( - "File list format incorrect! It should be" - " image_name label_name\\n") - image_path = os.path.join(self.dataset_root, items[0]) - grt_path = None - else: - image_path = os.path.join(self.dataset_root, items[0]) - grt_path = os.path.join(self.dataset_root, items[1]) - self.file_list.append([image_path, grt_path]) diff --git a/dygraph/datasets/voc.py b/dygraph/datasets/voc.py deleted file mode 100644 index da1f9971ff440fbedf10ec2debc7ddaccd372226..0000000000000000000000000000000000000000 --- a/dygraph/datasets/voc.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from .dataset import Dataset -from dygraph.utils.download import download_file_and_uncompress -from dygraph.cvlibs import manager -from dygraph.transforms import Compose - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') -URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" - - -@manager.DATASETS.add_component -class PascalVOC(Dataset): - """Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset, - please run the voc_augment.py in tools. - Args: - dataset_root: The dataset directory. - mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'. - transforms: Transforms for image. - download: Whether to download dataset if dataset_root is None. - """ - - def __init__(self, - dataset_root=None, - mode='train', - transforms=None, - download=True): - self.dataset_root = dataset_root - self.transforms = Compose(transforms) - self.mode = mode - self.file_list = list() - self.num_classes = 21 - - if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']: - raise Exception( - "`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}." - .format(mode)) - - if self.transforms is None: - raise Exception("`transforms` is necessary, but it is None.") - - if self.dataset_root is None: - if not download: - raise Exception( - "`dataset_root` not set and auto download disabled.") - self.dataset_root = download_file_and_uncompress( - url=URL, - savepath=DATA_HOME, - extrapath=DATA_HOME, - extraname='VOCdevkit') - elif not os.path.exists(self.dataset_root): - raise Exception('there is not `dataset_root`: {}.'.format( - self.dataset_root)) - - image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets', - 'Segmentation') - if mode == 'train': - file_list = os.path.join(image_set_dir, 'train.txt') - elif mode == 'val': - file_list = os.path.join(image_set_dir, 'val.txt') - elif mode == 'trainval': - file_list = os.path.join(image_set_dir, 'trainval.txt') - elif mode == 'trainaug': - file_list = os.path.join(image_set_dir, 'train.txt') - file_list_aug = os.path.join(image_set_dir, 'aug.txt') - - if not os.path.exists(file_list_aug): - raise Exception( - "When `mode` is 'trainaug', Pascal Voc dataset should be augmented, " - "Please make sure voc_augment.py has been properly run when using this mode." - ) - - img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages') - grt_dir = os.path.join(self.dataset_root, 'VOC2012', - 'SegmentationClass') - grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012', - 'SegmentationClassAug') - - with open(file_list, 'r') as f: - for line in f: - line = line.strip() - image_path = os.path.join(img_dir, ''.join([line, '.jpg'])) - grt_path = os.path.join(grt_dir, ''.join([line, '.png'])) - self.file_list.append([image_path, grt_path]) - if mode == 'trainaug': - with open(file_list_aug, 'r') as f: - for line in f: - line = line.strip() - image_path = os.path.join(img_dir, ''.join([line, '.jpg'])) - grt_path = os.path.join(grt_dir_aug, ''.join([line, - '.png'])) - self.file_list.append([image_path, grt_path]) diff --git a/dygraph/models/__init__.py b/dygraph/models/__init__.py deleted file mode 100644 index f3a62e3b39c80b47bb4d50e54f7dae4018cd2d32..0000000000000000000000000000000000000000 --- a/dygraph/models/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .architectures import * -from .losses import * -from .unet import UNet -from .deeplab import * -from .fcn import * -from .pspnet import * -from .ocrnet import * diff --git a/dygraph/models/architectures/__init__.py b/dygraph/models/architectures/__init__.py deleted file mode 100644 index 730c8f97b44c4f85429e200ebf43d13b7439c5cf..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import layer_utils -from .hrnet import * -from .resnet_vd import * -from .xception_deeplab import * -from .mobilenetv3 import * diff --git a/dygraph/models/architectures/hrnet.py b/dygraph/models/architectures/hrnet.py deleted file mode 100644 index dda718f5315efb45753aed4f13ea28d4e322e60a..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/hrnet.py +++ /dev/null @@ -1,850 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear -from paddle.fluid.initializer import Normal -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.cvlibs import manager -from dygraph.utils import utils -from dygraph.cvlibs import param_init - -__all__ = [ - "HRNet_W18_Small_V1", "HRNet_W18_Small_V2", "HRNet_W18", "HRNet_W30", - "HRNet_W32", "HRNet_W40", "HRNet_W44", "HRNet_W48", "HRNet_W60", "HRNet_W64" -] - - -class HRNet(fluid.dygraph.Layer): - """ - HRNet:Deep High-Resolution Representation Learning for Visual Recognition - https://arxiv.org/pdf/1908.07919.pdf. - - Args: - backbone_pretrained (str): the path of pretrained model. - stage1_num_modules (int): number of modules for stage1. Default 1. - stage1_num_blocks (list): number of blocks per module for stage1. Default [4]. - stage1_num_channels (list): number of channels per branch for stage1. Default [64]. - stage2_num_modules (int): number of modules for stage2. Default 1. - stage2_num_blocks (list): number of blocks per module for stage2. Default [4, 4] - stage2_num_channels (list): number of channels per branch for stage2. Default [18, 36]. - stage3_num_modules (int): number of modules for stage3. Default 4. - stage3_num_blocks (list): number of blocks per module for stage3. Default [4, 4, 4] - stage3_num_channels (list): number of channels per branch for stage3. Default [18, 36, 72]. - stage4_num_modules (int): number of modules for stage4. Default 3. - stage4_num_blocks (list): number of blocks per module for stage4. Default [4, 4, 4, 4] - stage4_num_channels (list): number of channels per branch for stage4. Default [18, 36, 72. 144]. - has_se (bool): whether to use Squeeze-and-Excitation module. Default False. - """ - - def __init__(self, - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[18, 36], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[18, 36, 72, 144], - has_se=False): - super(HRNet, self).__init__() - - self.stage1_num_modules = stage1_num_modules - self.stage1_num_blocks = stage1_num_blocks - self.stage1_num_channels = stage1_num_channels - self.stage2_num_modules = stage2_num_modules - self.stage2_num_blocks = stage2_num_blocks - self.stage2_num_channels = stage2_num_channels - self.stage3_num_modules = stage3_num_modules - self.stage3_num_blocks = stage3_num_blocks - self.stage3_num_channels = stage3_num_channels - self.stage4_num_modules = stage4_num_modules - self.stage4_num_blocks = stage4_num_blocks - self.stage4_num_channels = stage4_num_channels - self.has_se = has_se - - self.conv_layer1_1 = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name="layer1_1") - - self.conv_layer1_2 = ConvBNLayer( - num_channels=64, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name="layer1_2") - - self.la1 = Layer1( - num_channels=64, - num_blocks=self.stage1_num_blocks[0], - num_filters=self.stage1_num_channels[0], - has_se=has_se, - name="layer2") - - self.tr1 = TransitionLayer( - in_channels=[self.stage1_num_channels[0] * 4], - out_channels=self.stage2_num_channels, - name="tr1") - - self.st2 = Stage( - num_channels=self.stage2_num_channels, - num_modules=self.stage2_num_modules, - num_blocks=self.stage2_num_blocks, - num_filters=self.stage2_num_channels, - has_se=self.has_se, - name="st2") - - self.tr2 = TransitionLayer( - in_channels=self.stage2_num_channels, - out_channels=self.stage3_num_channels, - name="tr2") - self.st3 = Stage( - num_channels=self.stage3_num_channels, - num_modules=self.stage3_num_modules, - num_blocks=self.stage3_num_blocks, - num_filters=self.stage3_num_channels, - has_se=self.has_se, - name="st3") - - self.tr3 = TransitionLayer( - in_channels=self.stage3_num_channels, - out_channels=self.stage4_num_channels, - name="tr3") - self.st4 = Stage( - num_channels=self.stage4_num_channels, - num_modules=self.stage4_num_modules, - num_blocks=self.stage4_num_blocks, - num_filters=self.stage4_num_channels, - has_se=self.has_se, - name="st4") - - def forward(self, x, label=None, mode='train'): - input_shape = x.shape[2:] - conv1 = self.conv_layer1_1(x) - conv2 = self.conv_layer1_2(conv1) - - la1 = self.la1(conv2) - - tr1 = self.tr1([la1]) - st2 = self.st2(tr1) - - tr2 = self.tr2(st2) - st3 = self.st3(tr2) - - tr3 = self.tr3(st3) - st4 = self.st4(tr3) - - x0_h, x0_w = st4[0].shape[2:] - x1 = fluid.layers.resize_bilinear(st4[1], out_shape=(x0_h, x0_w)) - x2 = fluid.layers.resize_bilinear(st4[2], out_shape=(x0_h, x0_w)) - x3 = fluid.layers.resize_bilinear(st4[3], out_shape=(x0_h, x0_w)) - x = fluid.layers.concat([st4[0], x1, x2, x3], axis=1) - - return [x] - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - stride=1, - groups=1, - act="relu", - name=None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) - self._batch_norm = BatchNorm(num_filters) - self.act = act - - def forward(self, input): - y = self._conv(input) - y = self._batch_norm(y) - if self.act == 'relu': - y = fluid.layers.relu(y) - return y - - -class Layer1(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - num_blocks, - has_se=False, - name=None): - super(Layer1, self).__init__() - - self.bottleneck_block_list = [] - - for i in range(num_blocks): - bottleneck_block = self.add_sublayer( - "bb_{}_{}".format(name, i + 1), - BottleneckBlock( - num_channels=num_channels if i == 0 else num_filters * 4, - num_filters=num_filters, - has_se=has_se, - stride=1, - downsample=True if i == 0 else False, - name=name + '_' + str(i + 1))) - self.bottleneck_block_list.append(bottleneck_block) - - def forward(self, input): - conv = input - for block_func in self.bottleneck_block_list: - conv = block_func(conv) - return conv - - -class TransitionLayer(fluid.dygraph.Layer): - def __init__(self, in_channels, out_channels, name=None): - super(TransitionLayer, self).__init__() - - num_in = len(in_channels) - num_out = len(out_channels) - self.conv_bn_func_list = [] - for i in range(num_out): - residual = None - if i < num_in: - if in_channels[i] != out_channels[i]: - residual = self.add_sublayer( - "transition_{}_layer_{}".format(name, i + 1), - ConvBNLayer( - num_channels=in_channels[i], - num_filters=out_channels[i], - filter_size=3, - name=name + '_layer_' + str(i + 1))) - else: - residual = self.add_sublayer( - "transition_{}_layer_{}".format(name, i + 1), - ConvBNLayer( - num_channels=in_channels[-1], - num_filters=out_channels[i], - filter_size=3, - stride=2, - name=name + '_layer_' + str(i + 1))) - self.conv_bn_func_list.append(residual) - - def forward(self, input): - outs = [] - for idx, conv_bn_func in enumerate(self.conv_bn_func_list): - if conv_bn_func is None: - outs.append(input[idx]) - else: - if idx < len(input): - outs.append(conv_bn_func(input[idx])) - else: - outs.append(conv_bn_func(input[-1])) - return outs - - -class Branches(fluid.dygraph.Layer): - def __init__(self, - num_blocks, - in_channels, - out_channels, - has_se=False, - name=None): - super(Branches, self).__init__() - - self.basic_block_list = [] - - for i in range(len(out_channels)): - self.basic_block_list.append([]) - for j in range(num_blocks[i]): - in_ch = in_channels[i] if j == 0 else out_channels[i] - basic_block_func = self.add_sublayer( - "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), - BasicBlock( - num_channels=in_ch, - num_filters=out_channels[i], - has_se=has_se, - name=name + '_branch_layer_' + str(i + 1) + '_' + - str(j + 1))) - self.basic_block_list[i].append(basic_block_func) - - def forward(self, inputs): - outs = [] - for idx, input in enumerate(inputs): - conv = input - for basic_block_func in self.basic_block_list[idx]: - conv = basic_block_func(conv) - outs.append(conv) - return outs - - -class BottleneckBlock(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - has_se, - stride=1, - downsample=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.has_se = has_se - self.downsample = downsample - - self.conv1 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_conv1", - ) - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_conv2") - self.conv3 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_conv3") - - if self.downsample: - self.conv_down = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_downsample") - - if self.has_se: - self.se = SELayer( - num_channels=num_filters * 4, - num_filters=num_filters * 4, - reduction_ratio=16, - name=name + '_fc') - - def forward(self, input): - residual = input - conv1 = self.conv1(input) - conv2 = self.conv2(conv1) - conv3 = self.conv3(conv2) - - if self.downsample: - residual = self.conv_down(input) - - if self.has_se: - conv3 = self.se(conv3) - - y = fluid.layers.elementwise_add(x=conv3, y=residual, act="relu") - return y - - -class BasicBlock(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - stride=1, - has_se=False, - downsample=False, - name=None): - super(BasicBlock, self).__init__() - - self.has_se = has_se - self.downsample = downsample - - self.conv1 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_conv1") - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=1, - act=None, - name=name + "_conv2") - - if self.downsample: - self.conv_down = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - act="relu", - name=name + "_downsample") - - if self.has_se: - self.se = SELayer( - num_channels=num_filters, - num_filters=num_filters, - reduction_ratio=16, - name=name + '_fc') - - def forward(self, input): - residual = input - conv1 = self.conv1(input) - conv2 = self.conv2(conv1) - - if self.downsample: - residual = self.conv_down(input) - - if self.has_se: - conv2 = self.se(conv2) - - y = fluid.layers.elementwise_add(x=conv2, y=residual, act="relu") - return y - - -class SELayer(fluid.dygraph.Layer): - def __init__(self, num_channels, num_filters, reduction_ratio, name=None): - super(SELayer, self).__init__() - - self.pool2d_gap = Pool2D(pool_type='avg', global_pooling=True) - - self._num_channels = num_channels - - med_ch = int(num_channels / reduction_ratio) - stdv = 1.0 / math.sqrt(num_channels * 1.0) - self.squeeze = Linear( - num_channels, - med_ch, - act="relu", - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + "_sqz_weights"), - bias_attr=ParamAttr(name=name + '_sqz_offset')) - - stdv = 1.0 / math.sqrt(med_ch * 1.0) - self.excitation = Linear( - med_ch, - num_filters, - act="sigmoid", - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + "_exc_weights"), - bias_attr=ParamAttr(name=name + '_exc_offset')) - - def forward(self, input): - pool = self.pool2d_gap(input) - pool = fluid.layers.reshape(pool, shape=[-1, self._num_channels]) - squeeze = self.squeeze(pool) - excitation = self.excitation(squeeze) - excitation = fluid.layers.reshape( - excitation, shape=[-1, self._num_channels, 1, 1]) - out = input * excitation - return out - - -class Stage(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_modules, - num_blocks, - num_filters, - has_se=False, - multi_scale_output=True, - name=None): - super(Stage, self).__init__() - - self._num_modules = num_modules - - self.stage_func_list = [] - for i in range(num_modules): - if i == num_modules - 1 and not multi_scale_output: - stage_func = self.add_sublayer( - "stage_{}_{}".format(name, i + 1), - HighResolutionModule( - num_channels=num_channels, - num_blocks=num_blocks, - num_filters=num_filters, - has_se=has_se, - multi_scale_output=False, - name=name + '_' + str(i + 1))) - else: - stage_func = self.add_sublayer( - "stage_{}_{}".format(name, i + 1), - HighResolutionModule( - num_channels=num_channels, - num_blocks=num_blocks, - num_filters=num_filters, - has_se=has_se, - name=name + '_' + str(i + 1))) - - self.stage_func_list.append(stage_func) - - def forward(self, input): - out = input - for idx in range(self._num_modules): - out = self.stage_func_list[idx](out) - return out - - -class HighResolutionModule(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_blocks, - num_filters, - has_se=False, - multi_scale_output=True, - name=None): - super(HighResolutionModule, self).__init__() - - self.branches_func = Branches( - num_blocks=num_blocks, - in_channels=num_channels, - out_channels=num_filters, - has_se=has_se, - name=name) - - self.fuse_func = FuseLayers( - in_channels=num_filters, - out_channels=num_filters, - multi_scale_output=multi_scale_output, - name=name) - - def forward(self, input): - out = self.branches_func(input) - out = self.fuse_func(out) - return out - - -class FuseLayers(fluid.dygraph.Layer): - def __init__(self, - in_channels, - out_channels, - multi_scale_output=True, - name=None): - super(FuseLayers, self).__init__() - - self._actual_ch = len(in_channels) if multi_scale_output else 1 - self._in_channels = in_channels - - self.residual_func_list = [] - for i in range(self._actual_ch): - for j in range(len(in_channels)): - residual_func = None - if j > i: - residual_func = self.add_sublayer( - "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), - ConvBNLayer( - num_channels=in_channels[j], - num_filters=out_channels[i], - filter_size=1, - stride=1, - act=None, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1))) - self.residual_func_list.append(residual_func) - elif j < i: - pre_num_filters = in_channels[j] - for k in range(i - j): - if k == i - j - 1: - residual_func = self.add_sublayer( - "residual_{}_layer_{}_{}_{}".format( - name, i + 1, j + 1, k + 1), - ConvBNLayer( - num_channels=pre_num_filters, - num_filters=out_channels[i], - filter_size=3, - stride=2, - act=None, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1))) - pre_num_filters = out_channels[i] - else: - residual_func = self.add_sublayer( - "residual_{}_layer_{}_{}_{}".format( - name, i + 1, j + 1, k + 1), - ConvBNLayer( - num_channels=pre_num_filters, - num_filters=out_channels[j], - filter_size=3, - stride=2, - act="relu", - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1))) - pre_num_filters = out_channels[j] - self.residual_func_list.append(residual_func) - - def forward(self, input): - outs = [] - residual_func_idx = 0 - for i in range(self._actual_ch): - residual = input[i] - residual_shape = residual.shape[-2:] - for j in range(len(self._in_channels)): - if j > i: - y = self.residual_func_list[residual_func_idx](input[j]) - residual_func_idx += 1 - - y = fluid.layers.resize_bilinear( - input=y, out_shape=residual_shape) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - elif j < i: - y = input[j] - for k in range(i - j): - y = self.residual_func_list[residual_func_idx](y) - residual_func_idx += 1 - - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - - layer_helper = LayerHelper(self.full_name(), act='relu') - residual = layer_helper.append_activation(residual) - outs.append(residual) - - return outs - - -class LastClsOut(fluid.dygraph.Layer): - def __init__(self, - num_channel_list, - has_se, - num_filters_list=[32, 64, 128, 256], - name=None): - super(LastClsOut, self).__init__() - - self.func_list = [] - for idx in range(len(num_channel_list)): - func = self.add_sublayer( - "conv_{}_conv_{}".format(name, idx + 1), - BottleneckBlock( - num_channels=num_channel_list[idx], - num_filters=num_filters_list[idx], - has_se=has_se, - downsample=True, - name=name + 'conv_' + str(idx + 1))) - self.func_list.append(func) - - def forward(self, inputs): - outs = [] - for idx, input in enumerate(inputs): - out = self.func_list[idx](input) - outs.append(out) - return outs - - -@manager.BACKBONES.add_component -def HRNet_W18_Small_V1(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[1], - stage1_num_channels=[32], - stage2_num_modules=1, - stage2_num_blocks=[2, 2], - stage2_num_channels=[16, 32], - stage3_num_modules=1, - stage3_num_blocks=[2, 2, 2], - stage3_num_channels=[16, 32, 64], - stage4_num_modules=1, - stage4_num_blocks=[2, 2, 2, 2], - stage4_num_channels=[16, 32, 64, 128], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W18_Small_V2(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[2], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[2, 2], - stage2_num_channels=[18, 36], - stage3_num_modules=1, - stage3_num_blocks=[2, 2, 2], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=1, - stage4_num_blocks=[2, 2, 2, 2], - stage4_num_channels=[18, 36, 72, 144], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W18(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[18, 36], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[18, 36, 72, 144], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W30(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[30, 60], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[30, 60, 120], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[30, 60, 120, 240], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W32(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[32, 64], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[32, 64, 128], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[32, 64, 128, 256], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W40(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[40, 80], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[40, 80, 160], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[40, 80, 160, 320], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W44(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[44, 88], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[44, 88, 176], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[44, 88, 176, 352], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W48(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[48, 96], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[48, 96, 192], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[48, 96, 192, 384], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W60(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[60, 120], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[60, 120, 240], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[60, 120, 240, 480], - **kwargs) - return model - - -@manager.BACKBONES.add_component -def HRNet_W64(**kwargs): - model = HRNet( - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[64, 128], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[64, 128, 256], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[64, 128, 256, 512], - **kwargs) - return model diff --git a/dygraph/models/architectures/layer_utils.py b/dygraph/models/architectures/layer_utils.py deleted file mode 100644 index a9842f188276b6347f4f2ced100ff8c6c00f2715..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/layer_utils.py +++ /dev/null @@ -1,162 +0,0 @@ -# -*- encoding: utf-8 -*- -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.nn.functional as F -from paddle import fluid -from paddle.fluid import dygraph -from paddle.fluid.dygraph import Conv2D -from paddle.nn import SyncBatchNorm as BatchNorm -from paddle.nn.layer import activation - - -class ConvBnRelu(dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - using_sep_conv=False, - **kwargs): - - super(ConvBnRelu, self).__init__() - - if using_sep_conv: - self.conv = DepthwiseConvBnRelu(num_channels, - num_filters, - filter_size, - **kwargs) - else: - - self.conv = Conv2D(num_channels, - num_filters, - filter_size, - **kwargs) - - self.batch_norm = BatchNorm(num_filters) - - def forward(self, x): - x = self.conv(x) - x = self.batch_norm(x) - x = F.relu(x) - return x - - -class ConvBn(dygraph.Layer): - def __init__(self, num_channels, num_filters, filter_size, **kwargs): - super(ConvBn, self).__init__() - self.conv = Conv2D(num_channels, - num_filters, - filter_size, - **kwargs) - self.batch_norm = BatchNorm(num_filters) - - def forward(self, x): - x = self.conv(x) - x = self.batch_norm(x) - return x - - -class ConvReluPool(dygraph.Layer): - def __init__(self, num_channels, num_filters): - super(ConvReluPool, self).__init__() - self.conv = Conv2D(num_channels, - num_filters, - filter_size=3, - stride=1, - padding=1, - dilation=1) - - def forward(self, x): - x = self.conv(x) - x = F.relu(x) - x = fluid.layers.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) - return x - - -class ConvBnReluUpsample(dygraph.Layer): - def __init__(self, num_channels, num_filters): - super(ConvBnReluUpsample, self).__init__() - self.conv_bn_relu = ConvBnRelu(num_channels, num_filters) - - def forward(self, x, upsample_scale=2): - x = self.conv_bn_relu(x) - new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale] - x = fluid.layers.resize_bilinear(x, new_shape) - return x - - -class DepthwiseConvBnRelu(dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - **kwargs): - super(DepthwiseConvBnRelu, self).__init__() - self.depthwise_conv = ConvBn(num_channels, - num_filters=num_channels, - filter_size=filter_size, - groups=num_channels, - use_cudnn=False, - **kwargs) - self.piontwise_conv = ConvBnRelu(num_channels, - num_filters, - filter_size=1, - groups=1) - - def forward(self, x): - x = self.depthwise_conv(x) - x = self.piontwise_conv(x) - return x - - -class Activation(fluid.dygraph.Layer): - """ - The wrapper of activations - For example: - >>> relu = Activation("relu") - >>> print(relu) - - >>> sigmoid = Activation("sigmoid") - >>> print(sigmoid) - - >>> not_exit_one = Activation("not_exit_one") - KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', - 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', - 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" - - Args: - act (str): the activation name in lowercase - """ - - def __init__(self, act=None): - super(Activation, self).__init__() - - self._act = act - upper_act_names = activation.__all__ - lower_act_names = [act.lower() for act in upper_act_names] - act_dict = dict(zip(lower_act_names, upper_act_names)) - - if act is not None: - if act in act_dict.keys(): - act_name = act_dict[act] - self.act_func = eval("activation.{}()".format(act_name)) - else: - raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) - - def forward(self, x): - - if self._act is not None: - return self.act_func(x) - else: - return x \ No newline at end of file diff --git a/dygraph/models/architectures/mobilenetv3.py b/dygraph/models/architectures/mobilenetv3.py deleted file mode 100644 index 07805c1b806d18f47d96b8ae1a35c734625f67b3..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/mobilenetv3.py +++ /dev/null @@ -1,452 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os - -import numpy as np -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.models.architectures import layer_utils -from dygraph.cvlibs import manager -from dygraph.utils import utils - -__all__ = [ - "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", - "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0", - "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35", - "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75", - "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25" -] - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def get_padding_same(kernel_size, dilation_rate): - """ - SAME padding implementation given kernel_size and dilation_rate. - The calculation formula as following: - (F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new - where F: a feature map - k: kernel size, r: dilation rate, p: padding value, s: stride - F_new: new feature map - Args: - kernel_size (int) - dilation_rate (int) - - Returns: - padding_same (int): padding value - """ - k = kernel_size - r = dilation_rate - padding_same = (k + (k - 1) * (r - 1) - 1) // 2 - - return padding_same - - -class MobileNetV3(fluid.dygraph.Layer): - def __init__(self, - backbone_pretrained=None, - scale=1.0, - model_name="small", - class_dim=1000, - output_stride=None, - **kwargs): - super(MobileNetV3, self).__init__() - - inplanes = 16 - if model_name == "large": - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, "relu", 1], - [3, 64, 24, False, "relu", 2], - [3, 72, 24, False, "relu", 1], # output 1 -> out_index=2 - [5, 72, 40, True, "relu", 2], - [5, 120, 40, True, "relu", 1], - [5, 120, 40, True, "relu", 1], # output 2 -> out_index=5 - [3, 240, 80, False, "hard_swish", 2], - [3, 200, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 480, 112, True, "hard_swish", 1], - [3, 672, 112, True, "hard_swish", - 1], # output 3 -> out_index=11 - [5, 672, 160, True, "hard_swish", 2], - [5, 960, 160, True, "hard_swish", 1], - [5, 960, 160, True, "hard_swish", - 1], # output 3 -> out_index=14 - ] - self.out_indices = [2, 5, 11, 14] - - self.cls_ch_squeeze = 960 - self.cls_ch_expand = 1280 - elif model_name == "small": - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, "relu", 2], # output 1 -> out_index=0 - [3, 72, 24, False, "relu", 2], - [3, 88, 24, False, "relu", 1], # output 2 -> out_index=3 - [5, 96, 40, True, "hard_swish", 2], - [5, 240, 40, True, "hard_swish", 1], - [5, 240, 40, True, "hard_swish", 1], - [5, 120, 48, True, "hard_swish", 1], - [5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7 - [5, 288, 96, True, "hard_swish", 2], - [5, 576, 96, True, "hard_swish", 1], - [5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10 - ] - self.out_indices = [0, 3, 7, 10] - - self.cls_ch_squeeze = 576 - self.cls_ch_expand = 1280 - else: - raise NotImplementedError( - "mode[{}_model] is not implemented!".format(model_name)) - - ################################################### - # modify stride and dilation based on output_stride - self.dilation_cfg = [1] * len(self.cfg) - self.modify_bottle_params(output_stride=output_stride) - ################################################### - - self.conv1 = ConvBNLayer( - in_c=3, - out_c=make_divisible(inplanes * scale), - filter_size=3, - stride=2, - padding=1, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv1") - - self.block_list = [] - - inplanes = make_divisible(inplanes * scale) - for i, (k, exp, c, se, nl, s) in enumerate(self.cfg): - ###################################### - # add dilation rate - dilation_rate = self.dilation_cfg[i] - ###################################### - self.block_list.append( - ResidualUnit( - in_c=inplanes, - mid_c=make_divisible(scale * exp), - out_c=make_divisible(scale * c), - filter_size=k, - stride=s, - dilation=dilation_rate, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - self.add_sublayer( - sublayer=self.block_list[-1], name="conv" + str(i + 2)) - inplanes = make_divisible(scale * c) - - self.last_second_conv = ConvBNLayer( - in_c=inplanes, - out_c=make_divisible(scale * self.cls_ch_squeeze), - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv_last") - - self.pool = Pool2D( - pool_type="avg", global_pooling=True, use_cudnn=False) - - self.last_conv = Conv2D( - num_channels=make_divisible(scale * self.cls_ch_squeeze), - num_filters=self.cls_ch_expand, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name="last_1x1_conv_weights"), - bias_attr=False) - - self.out = Linear( - input_dim=self.cls_ch_expand, - output_dim=class_dim, - param_attr=ParamAttr("fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - self.init_weight(backbone_pretrained) - - def modify_bottle_params(self, output_stride=None): - - if output_stride is not None and output_stride % 2 != 0: - raise Exception("output stride must to be even number") - if output_stride is not None: - stride = 2 - rate = 1 - for i, _cfg in enumerate(self.cfg): - stride = stride * _cfg[-1] - if stride > output_stride: - rate = rate * _cfg[-1] - self.cfg[i][-1] = 1 - - self.dilation_cfg[i] = rate - - def forward(self, inputs, label=None, dropout_prob=0.2): - x = self.conv1(inputs) - # A feature list saves each downsampling feature. - feat_list = [] - for i, block in enumerate(self.block_list): - x = block(x) - if i in self.out_indices: - feat_list.append(x) - #print("block {}:".format(i),x.shape, self.dilation_cfg[i]) - x = self.last_second_conv(x) - x = self.pool(x) - x = self.last_conv(x) - x = fluid.layers.hard_swish(x) - x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob) - x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]]) - x = self.out(x) - - return x, feat_list - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - in_c, - out_c, - filter_size, - stride, - padding, - dilation=1, - num_groups=1, - if_act=True, - act=None, - use_cudnn=True, - name=""): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - - self.conv = fluid.dygraph.Conv2D( - num_channels=in_c, - num_filters=out_c, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=num_groups, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - use_cudnn=use_cudnn, - act=None) - self.bn = BatchNorm( - num_features=out_c, - weight_attr=ParamAttr( - name=name + "_bn_scale", - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0)), - bias_attr=ParamAttr( - name=name + "_bn_offset", - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0))) - - self._act_op = layer_utils.Activation(act=None) - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = fluid.layers.relu(x) - elif self.act == "hard_swish": - x = fluid.layers.hard_swish(x) - else: - print("The activation function is selected incorrectly.") - exit() - return x - - -class ResidualUnit(fluid.dygraph.Layer): - def __init__(self, - in_c, - mid_c, - out_c, - filter_size, - stride, - use_se, - dilation=1, - act=None, - name=''): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_c == out_c - self.if_se = use_se - - self.expand_conv = ConvBNLayer( - in_c=in_c, - out_c=mid_c, - filter_size=1, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + "_expand") - - self.bottleneck_conv = ConvBNLayer( - in_c=mid_c, - out_c=mid_c, - filter_size=filter_size, - stride=stride, - padding=get_padding_same( - filter_size, - dilation), #int((filter_size - 1) // 2) + (dilation - 1), - dilation=dilation, - num_groups=mid_c, - if_act=True, - act=act, - name=name + "_depthwise") - if self.if_se: - self.mid_se = SEModule(mid_c, name=name + "_se") - self.linear_conv = ConvBNLayer( - in_c=mid_c, - out_c=out_c, - filter_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name=name + "_linear") - self.dilation = dilation - - def forward(self, inputs): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = fluid.layers.elementwise_add(inputs, x) - return x - - -class SEModule(fluid.dygraph.Layer): - def __init__(self, channel, reduction=4, name=""): - super(SEModule, self).__init__() - self.avg_pool = fluid.dygraph.Pool2D( - pool_type="avg", global_pooling=True, use_cudnn=False) - self.conv1 = fluid.dygraph.Conv2D( - num_channels=channel, - num_filters=channel // reduction, - filter_size=1, - stride=1, - padding=0, - act="relu", - param_attr=ParamAttr(name=name + "_1_weights"), - bias_attr=ParamAttr(name=name + "_1_offset")) - self.conv2 = fluid.dygraph.Conv2D( - num_channels=channel // reduction, - num_filters=channel, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name + "_2_weights"), - bias_attr=ParamAttr(name=name + "_2_offset")) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = self.conv2(outputs) - outputs = fluid.layers.hard_sigmoid(outputs) - return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0) - - -def MobileNetV3_small_x0_35(**kwargs): - model = MobileNetV3(model_name="small", scale=0.35, **kwargs) - return model - - -def MobileNetV3_small_x0_5(**kwargs): - model = MobileNetV3(model_name="small", scale=0.5, **kwargs) - return model - - -def MobileNetV3_small_x0_75(**kwargs): - model = MobileNetV3(model_name="small", scale=0.75, **kwargs) - return model - - -@manager.BACKBONES.add_component -def MobileNetV3_small_x1_0(**kwargs): - model = MobileNetV3(model_name="small", scale=1.0, **kwargs) - return model - - -def MobileNetV3_small_x1_25(**kwargs): - model = MobileNetV3(model_name="small", scale=1.25, **kwargs) - return model - - -def MobileNetV3_large_x0_35(**kwargs): - model = MobileNetV3(model_name="large", scale=0.35, **kwargs) - return model - - -def MobileNetV3_large_x0_5(**kwargs): - model = MobileNetV3(model_name="large", scale=0.5, **kwargs) - return model - - -def MobileNetV3_large_x0_75(**kwargs): - model = MobileNetV3(model_name="large", scale=0.75, **kwargs) - return model - - -@manager.BACKBONES.add_component -def MobileNetV3_large_x1_0(**kwargs): - model = MobileNetV3(model_name="large", scale=1.0, **kwargs) - return model - - -def MobileNetV3_large_x1_25(**kwargs): - model = MobileNetV3(model_name="large", scale=1.25, **kwargs) - return model diff --git a/dygraph/models/architectures/resnet_vd.py b/dygraph/models/architectures/resnet_vd.py deleted file mode 100644 index 582934505385872c60ff92204fd862836e6ae7fb..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/resnet_vd.py +++ /dev/null @@ -1,419 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import math - -import numpy as np -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.utils import utils -from dygraph.models.architectures import layer_utils -from dygraph.cvlibs import manager -from dygraph.utils import utils - -__all__ = [ - "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" -] - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__( - self, - num_channels, - num_filters, - filter_size, - stride=1, - dilation=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = Pool2D( - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2 if dilation == 1 else 0, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm( - num_filters, - weight_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset')) - self._act_op = layer_utils.Activation(act=act) - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - y = self._act_op(y) - - return y - - -class BottleneckBlock(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - stride, - shortcut=True, - if_first=False, - dilation=1, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - - self.dilation = dilation - - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - dilation=dilation, - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first or stride == 1 else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - - #################################################################### - # If given dilation rate > 1, using corresponding padding - if self.dilation > 1: - padding = self.dilation - y = fluid.layers.pad( - y, [0, 0, 0, 0, padding, padding, padding, padding]) - ##################################################################### - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = fluid.layers.elementwise_add(x=short, y=conv2) - layer_helper = LayerHelper(self.full_name(), act='relu') - return layer_helper.append_activation(y) - - -class BasicBlock(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = fluid.layers.elementwise_add(x=short, y=conv1) - - layer_helper = LayerHelper(self.full_name(), act='relu') - return layer_helper.append_activation(y) - - -class ResNet_vd(fluid.dygraph.Layer): - def __init__(self, - backbone_pretrained=None, - layers=50, - class_dim=1000, - output_stride=None, - multi_grid=(1, 2, 4), - **kwargs): - super(ResNet_vd, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, 1024 - ] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - dilation_dict = None - if output_stride == 8: - dilation_dict = {2: 2, 3: 4} - elif output_stride == 16: - dilation_dict = {3: 2} - - self.conv1_1 = ConvBNLayer( - num_channels=3, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - num_channels=32, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - num_channels=32, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - # self.block_list = [] - self.stage_list = [] - if layers >= 50: - for block in range(len(depth)): - shortcut = False - block_list = [] - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - ############################################################################### - # Add dilation rate for some segmentation tasks, if dilation_dict is not None. - dilation_rate = dilation_dict[ - block] if dilation_dict and block in dilation_dict else 1 - - # Actually block here is 'stage', and i is 'block' in 'stage' - # At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4) - if block == 3: - dilation_rate = dilation_rate * multi_grid[i] - #print("stage {}, block {}: dilation rate".format(block, i), dilation_rate) - ############################################################################### - - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 - and dilation_rate == 1 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name, - dilation=dilation_rate)) - - block_list.append(bottleneck_block) - shortcut = True - self.stage_list.append(block_list) - else: - for block in range(len(depth)): - shortcut = False - block_list = [] - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - block_list.append(basic_block) - shortcut = True - self.stage_list.append(block_list) - - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear( - self.pool2d_avg_channels, - class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - self.init_weight(backbone_pretrained) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - - # A feature list saves the output feature map of each stage. - feat_list = [] - for i, stage in enumerate(self.stage_list): - for j, block in enumerate(stage): - y = block(y) - #print("stage {} block {}".format(i+1, j+1), y.shape) - feat_list.append(y) - - y = self.pool2d_avg(y) - y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y, feat_list - - # def init_weight(self, pretrained_model=None): - - # if pretrained_model is not None: - # if os.path.exists(pretrained_model): - # utils.load_pretrained_model(self, pretrained_model) - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - -def ResNet18_vd(**args): - model = ResNet_vd(layers=18, **args) - return model - - -def ResNet34_vd(**args): - model = ResNet_vd(layers=34, **args) - return model - - -@manager.BACKBONES.add_component -def ResNet50_vd(**args): - model = ResNet_vd(layers=50, **args) - return model - - -@manager.BACKBONES.add_component -def ResNet101_vd(**args): - model = ResNet_vd(layers=101, **args) - return model - - -def ResNet152_vd(**args): - model = ResNet_vd(layers=152, **args) - return model - - -def ResNet200_vd(**args): - model = ResNet_vd(layers=200, **args) - return model diff --git a/dygraph/models/architectures/xception_deeplab.py b/dygraph/models/architectures/xception_deeplab.py deleted file mode 100644 index 4f7d97f837fcc2b7394be3ceef15b06387a5844a..0000000000000000000000000000000000000000 --- a/dygraph/models/architectures/xception_deeplab.py +++ /dev/null @@ -1,453 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.models.architectures import layer_utils -from dygraph.cvlibs import manager -from dygraph.utils import utils - -__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"] - - -def check_data(data, number): - if type(data) == int: - return [data] * number - assert len(data) == number - return data - - -def check_stride(s, os): - if s <= os: - return True - else: - return False - - -def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - -def gen_bottleneck_params(backbone='xception_65'): - if backbone == 'xception_65': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) - } - elif backbone == 'xception_41': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (8, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) - } - elif backbone == 'xception_71': - bottleneck_params = { - "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) - } - else: - raise Exception( - "xception backbont only support xception_41/xception_65/xception_71" - ) - return bottleneck_params - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - input_channels, - output_channels, - filter_size, - stride=1, - padding=0, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=input_channels, - num_filters=output_channels, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(name=name + "/weights"), - bias_attr=False) - self._bn = BatchNorm( - num_features=output_channels, - epsilon=1e-3, - momentum=0.99, - weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"), - bias_attr=ParamAttr(name=name + "/BatchNorm/beta")) - - self._act_op = layer_utils.Activation(act=act) - - def forward(self, inputs): - - return self._act_op(self._bn(self._conv(inputs))) - - -class Seperate_Conv(fluid.dygraph.Layer): - def __init__(self, - input_channels, - output_channels, - stride, - filter, - dilation=1, - act=None, - name=None): - super(Seperate_Conv, self).__init__() - - self._conv1 = Conv2D( - num_channels=input_channels, - num_filters=input_channels, - filter_size=filter, - stride=stride, - groups=input_channels, - padding=(filter) // 2 * dilation, - dilation=dilation, - param_attr=ParamAttr(name=name + "/depthwise/weights"), - bias_attr=False) - self._bn1 = BatchNorm( - input_channels, - epsilon=1e-3, - momentum=0.99, - weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"), - bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta")) - - self._act_op1 = layer_utils.Activation(act=act) - - self._conv2 = Conv2D( - input_channels, - output_channels, - 1, - stride=1, - groups=1, - padding=0, - param_attr=ParamAttr(name=name + "/pointwise/weights"), - bias_attr=False) - self._bn2 = BatchNorm( - output_channels, - epsilon=1e-3, - momentum=0.99, - weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"), - bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta")) - - self._act_op2 = layer_utils.Activation(act=act) - - def forward(self, inputs): - x = self._conv1(inputs) - x = self._bn1(x) - x = self._act_op1(x) - x = self._conv2(x) - x = self._bn2(x) - x = self._act_op2(x) - return x - - -class Xception_Block(fluid.dygraph.Layer): - def __init__(self, - input_channels, - output_channels, - strides=1, - filter_size=3, - dilation=1, - skip_conv=True, - has_skip=True, - activation_fn_in_separable_conv=False, - name=None): - super(Xception_Block, self).__init__() - - repeat_number = 3 - output_channels = check_data(output_channels, repeat_number) - filter_size = check_data(filter_size, repeat_number) - strides = check_data(strides, repeat_number) - - self.has_skip = has_skip - self.skip_conv = skip_conv - self.activation_fn_in_separable_conv = activation_fn_in_separable_conv - if not activation_fn_in_separable_conv: - self._conv1 = Seperate_Conv( - input_channels, - output_channels[0], - stride=strides[0], - filter=filter_size[0], - dilation=dilation, - name=name + "/separable_conv1") - self._conv2 = Seperate_Conv( - output_channels[0], - output_channels[1], - stride=strides[1], - filter=filter_size[1], - dilation=dilation, - name=name + "/separable_conv2") - self._conv3 = Seperate_Conv( - output_channels[1], - output_channels[2], - stride=strides[2], - filter=filter_size[2], - dilation=dilation, - name=name + "/separable_conv3") - else: - self._conv1 = Seperate_Conv( - input_channels, - output_channels[0], - stride=strides[0], - filter=filter_size[0], - act="relu", - dilation=dilation, - name=name + "/separable_conv1") - self._conv2 = Seperate_Conv( - output_channels[0], - output_channels[1], - stride=strides[1], - filter=filter_size[1], - act="relu", - dilation=dilation, - name=name + "/separable_conv2") - self._conv3 = Seperate_Conv( - output_channels[1], - output_channels[2], - stride=strides[2], - filter=filter_size[2], - act="relu", - dilation=dilation, - name=name + "/separable_conv3") - - if has_skip and skip_conv: - self._short = ConvBNLayer( - input_channels, - output_channels[-1], - 1, - stride=strides[-1], - padding=0, - name=name + "/shortcut") - - def forward(self, inputs): - layer_helper = LayerHelper(self.full_name(), act='relu') - if not self.activation_fn_in_separable_conv: - x = layer_helper.append_activation(inputs) - x = self._conv1(x) - x = layer_helper.append_activation(x) - x = self._conv2(x) - x = layer_helper.append_activation(x) - x = self._conv3(x) - else: - x = self._conv1(inputs) - x = self._conv2(x) - x = self._conv3(x) - if self.has_skip is False: - return x - if self.skip_conv: - skip = self._short(inputs) - else: - skip = inputs - return fluid.layers.elementwise_add(x, skip) - - -class XceptionDeeplab(fluid.dygraph.Layer): - - #def __init__(self, backbone, class_dim=1000): - # add output_stride - def __init__(self, - backbone, - backbone_pretrained=None, - output_stride=16, - class_dim=1000, - **kwargs): - - super(XceptionDeeplab, self).__init__() - - bottleneck_params = gen_bottleneck_params(backbone) - self.backbone = backbone - - self._conv1 = ConvBNLayer( - 3, - 32, - 3, - stride=2, - padding=1, - act="relu", - name=self.backbone + "/entry_flow/conv1") - self._conv2 = ConvBNLayer( - 32, - 64, - 3, - stride=1, - padding=1, - act="relu", - name=self.backbone + "/entry_flow/conv2") - """ - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]]) - } - - if output_stride == 16: - entry_block3_stride = 2 - middle_block_dilation = 1 - exit_block_dilations = (1, 2) - elif output_stride == 8: - entry_block3_stride = 1 - middle_block_dilation = 2 - exit_block_dilations = (2, 4) - - """ - self.block_num = bottleneck_params["entry_flow"][0] - self.strides = bottleneck_params["entry_flow"][1] - self.chns = bottleneck_params["entry_flow"][2] - self.strides = check_data(self.strides, self.block_num) - self.chns = check_data(self.chns, self.block_num) - - self.entry_flow = [] - self.middle_flow = [] - - self.stride = 2 - self.output_stride = output_stride - s = self.stride - - for i in range(self.block_num): - stride = self.strides[i] if check_stride(s * self.strides[i], - self.output_stride) else 1 - xception_block = self.add_sublayer( - self.backbone + "/entry_flow/block" + str(i + 1), - Xception_Block( - input_channels=64 if i == 0 else self.chns[i - 1], - output_channels=self.chns[i], - strides=[1, 1, self.stride], - name=self.backbone + "/entry_flow/block" + str(i + 1))) - self.entry_flow.append(xception_block) - s = s * stride - self.stride = s - - self.block_num = bottleneck_params["middle_flow"][0] - self.strides = bottleneck_params["middle_flow"][1] - self.chns = bottleneck_params["middle_flow"][2] - self.strides = check_data(self.strides, self.block_num) - self.chns = check_data(self.chns, self.block_num) - s = self.stride - - for i in range(self.block_num): - stride = self.strides[i] if check_stride(s * self.strides[i], - self.output_stride) else 1 - xception_block = self.add_sublayer( - self.backbone + "/middle_flow/block" + str(i + 1), - Xception_Block( - input_channels=728, - output_channels=728, - strides=[1, 1, self.strides[i]], - skip_conv=False, - name=self.backbone + "/middle_flow/block" + str(i + 1))) - self.middle_flow.append(xception_block) - s = s * stride - self.stride = s - - self.block_num = bottleneck_params["exit_flow"][0] - self.strides = bottleneck_params["exit_flow"][1] - self.chns = bottleneck_params["exit_flow"][2] - self.strides = check_data(self.strides, self.block_num) - self.chns = check_data(self.chns, self.block_num) - s = self.stride - stride = self.strides[0] if check_stride(s * self.strides[0], - self.output_stride) else 1 - self._exit_flow_1 = Xception_Block( - 728, - self.chns[0], [1, 1, stride], - name=self.backbone + "/exit_flow/block1") - s = s * stride - stride = self.strides[1] if check_stride(s * self.strides[1], - self.output_stride) else 1 - self._exit_flow_2 = Xception_Block( - self.chns[0][-1], - self.chns[1], [1, 1, stride], - dilation=2, - has_skip=False, - activation_fn_in_separable_conv=True, - name=self.backbone + "/exit_flow/block2") - s = s * stride - - self.stride = s - - self._drop = Dropout(p=0.5) - self._pool = Pool2D(pool_type="avg", global_pooling=True) - self._fc = Linear( - self.chns[1][-1], - class_dim, - param_attr=ParamAttr(name="fc_weights"), - bias_attr=ParamAttr(name="fc_bias")) - - self.init_weight(backbone_pretrained) - - def forward(self, inputs): - x = self._conv1(inputs) - x = self._conv2(x) - feat_list = [] - for i, ef in enumerate(self.entry_flow): - x = ef(x) - if i == 0: - feat_list.append(x) - for mf in self.middle_flow: - x = mf(x) - x = self._exit_flow_1(x) - x = self._exit_flow_2(x) - feat_list.append(x) - - x = self._drop(x) - x = self._pool(x) - x = fluid.layers.squeeze(x, axes=[2, 3]) - x = self._fc(x) - return x, feat_list - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - -def Xception41_deeplab(**args): - model = XceptionDeeplab('xception_41', **args) - return model - - -@manager.BACKBONES.add_component -def Xception65_deeplab(**args): - model = XceptionDeeplab("xception_65", **args) - return model - - -def Xception71_deeplab(**args): - model = XceptionDeeplab("xception_71", **args) - return model diff --git a/dygraph/models/deeplab.py b/dygraph/models/deeplab.py deleted file mode 100644 index 6911b63900d62b427e94a2b22e4919f6b664f250..0000000000000000000000000000000000000000 --- a/dygraph/models/deeplab.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from dygraph.cvlibs import manager -from dygraph.models.architectures import layer_utils -from paddle import fluid -from paddle.fluid import dygraph -from paddle.fluid.dygraph import Conv2D - -from dygraph.utils import utils - -__all__ = [ - 'DeepLabV3P', "deeplabv3p_resnet101_vd", "deeplabv3p_resnet101_vd_os8", - "deeplabv3p_resnet50_vd", "deeplabv3p_resnet50_vd_os8", - "deeplabv3p_xception65_deeplab", "deeplabv3p_mobilenetv3_large", - "deeplabv3p_mobilenetv3_small" -] - - -class ImageAverage(dygraph.Layer): - """ - Global average pooling - - Args: - num_channels (int): the number of input channels. - - """ - - def __init__(self, num_channels): - super(ImageAverage, self).__init__() - self.conv_bn_relu = layer_utils.ConvBnRelu( - num_channels, num_filters=256, filter_size=1) - - def forward(self, input): - x = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True) - x = self.conv_bn_relu(x) - x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:]) - return x - - -class ASPP(dygraph.Layer): - """ - Decoder module of DeepLabV3P model - - Args: - output_stride (int): the ratio of input size and final feature size. Support 16 or 8. - in_channels (int): the number of input channels in decoder module. - using_sep_conv (bool): whether use separable conv or not. Default to True. - """ - - def __init__(self, output_stride, in_channels, using_sep_conv=True): - super(ASPP, self).__init__() - - if output_stride == 16: - aspp_ratios = (6, 12, 18) - elif output_stride == 8: - aspp_ratios = (12, 24, 36) - else: - raise NotImplementedError( - "Only support output_stride is 8 or 16, but received{}".format( - output_stride)) - - self.image_average = ImageAverage(num_channels=in_channels) - - # The first aspp using 1*1 conv - self.aspp1 = layer_utils.ConvBnRelu( - num_channels=in_channels, - num_filters=256, - filter_size=1, - using_sep_conv=False) - - # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0] - self.aspp2 = layer_utils.ConvBnRelu( - num_channels=in_channels, - num_filters=256, - filter_size=3, - using_sep_conv=using_sep_conv, - dilation=aspp_ratios[0], - padding=aspp_ratios[0]) - - # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1] - self.aspp3 = layer_utils.ConvBnRelu( - num_channels=in_channels, - num_filters=256, - filter_size=3, - using_sep_conv=using_sep_conv, - dilation=aspp_ratios[1], - padding=aspp_ratios[1]) - - # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2] - self.aspp4 = layer_utils.ConvBnRelu( - num_channels=in_channels, - num_filters=256, - filter_size=3, - using_sep_conv=using_sep_conv, - dilation=aspp_ratios[2], - padding=aspp_ratios[2]) - - # After concat op, using 1*1 conv - self.conv_bn_relu = layer_utils.ConvBnRelu( - num_channels=1280, num_filters=256, filter_size=1) - - def forward(self, x): - - x1 = self.image_average(x) - x2 = self.aspp1(x) - x3 = self.aspp2(x) - x4 = self.aspp3(x) - x5 = self.aspp4(x) - x = fluid.layers.concat([x1, x2, x3, x4, x5], axis=1) - - x = self.conv_bn_relu(x) - x = fluid.layers.dropout(x, dropout_prob=0.1) - return x - - -class Decoder(dygraph.Layer): - """ - Decoder module of DeepLabV3P model - - Args: - num_classes (int): the number of classes. - in_channels (int): the number of input channels in decoder module. - using_sep_conv (bool): whether use separable conv or not. Default to True. - - """ - - def __init__(self, num_classes, in_channels, using_sep_conv=True): - super(Decoder, self).__init__() - - self.conv_bn_relu1 = layer_utils.ConvBnRelu( - num_channels=in_channels, num_filters=48, filter_size=1) - - self.conv_bn_relu2 = layer_utils.ConvBnRelu( - num_channels=304, - num_filters=256, - filter_size=3, - using_sep_conv=using_sep_conv, - padding=1) - self.conv_bn_relu3 = layer_utils.ConvBnRelu( - num_channels=256, - num_filters=256, - filter_size=3, - using_sep_conv=using_sep_conv, - padding=1) - self.conv = Conv2D( - num_channels=256, num_filters=num_classes, filter_size=1) - - def forward(self, x, low_level_feat): - low_level_feat = self.conv_bn_relu1(low_level_feat) - x = fluid.layers.resize_bilinear(x, low_level_feat.shape[2:]) - x = fluid.layers.concat([x, low_level_feat], axis=1) - x = self.conv_bn_relu2(x) - x = self.conv_bn_relu3(x) - x = self.conv(x) - return x - - -@manager.MODELS.add_component -class DeepLabV3P(dygraph.Layer): - """ - The DeepLabV3P consists of three main components, Backbone, ASPP and Decoder - The orginal artile refers to - "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" - Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. - (https://arxiv.org/abs/1802.02611) - - Args: - num_classes (int): the unique number of target classes. - - backbone (paddle.nn.Layer): backbone networks, currently support Xception65, Resnet101_vd. Default Resnet101_vd. - - model_pretrained (str): the path of pretrained model. - - output_stride (int): the ratio of input size and final feature size. Default 16. - - backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. - the first index will be taken as a low-level feature in Deconder component; - the second one will be taken as input of ASPP component. - Usually backbone consists of four downsampling stage, and return an output of - each stage, so we set default (0, 3), which means taking feature map of the first - stage in backbone as low-level feature used in Decoder, and feature map of the fourth - stage as input of ASPP. - - backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. - - ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default 255. - - using_sep_conv (bool): a bool value indicates whether using separable convolutions - in ASPP and Decoder components. Default True. - """ - - def __init__(self, - num_classes, - backbone, - model_pretrained=None, - output_stride=16, - backbone_indices=(0, 3), - backbone_channels=(256, 2048), - ignore_index=255, - using_sep_conv=True): - - super(DeepLabV3P, self).__init__() - - # self.backbone = manager.BACKBONES[backbone](output_stride=output_stride) - self.backbone = backbone - self.aspp = ASPP(output_stride, backbone_channels[1], using_sep_conv) - self.decoder = Decoder(num_classes, backbone_channels[0], - using_sep_conv) - self.ignore_index = ignore_index - self.EPS = 1e-5 - self.backbone_indices = backbone_indices - self.init_weight(model_pretrained) - - def forward(self, input, label=None): - - _, feat_list = self.backbone(input) - low_level_feat = feat_list[self.backbone_indices[0]] - x = feat_list[self.backbone_indices[1]] - x = self.aspp(x) - logit = self.decoder(x, low_level_feat) - logit = fluid.layers.resize_bilinear(logit, input.shape[2:]) - - if self.training: - return self._get_loss(logit, label) - else: - score_map = fluid.layers.softmax(logit, axis=1) - score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) - pred = fluid.layers.argmax(score_map, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - return pred, score_map - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - def _get_loss(self, logit, label): - """ - compute forward loss of the model - - Args: - logit (tensor): the logit of model output - label (tensor): ground truth - - Returns: - avg_loss (tensor): forward loss - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - mask = label != self.ignore_index - mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=self.ignore_index, - return_softmax=True, - axis=-1) - - loss = loss * mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(mask) + self.EPS) - - label.stop_gradient = True - mask.stop_gradient = True - - return avg_loss - - -def build_aspp(output_stride, using_sep_conv): - return ASPP(output_stride=output_stride, using_sep_conv=using_sep_conv) - - -def build_decoder(num_classes, using_sep_conv): - return Decoder(num_classes, using_sep_conv=using_sep_conv) - - -@manager.MODELS.add_component -def deeplabv3p_resnet101_vd(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_resnet101_vd_os8(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='ResNet101_vd', - output_stride=8, - pretrained_model=pretrained_model, - **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_resnet50_vd(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_resnet50_vd_os8(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='ResNet50_vd', - output_stride=8, - pretrained_model=pretrained_model, - **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_xception65_deeplab(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='Xception65_deeplab', - pretrained_model=pretrained_model, - backbone_indices=(0, 1), - backbone_channels=(128, 2048), - **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_mobilenetv3_large(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='MobileNetV3_large_x1_0', - pretrained_model=pretrained_model, - backbone_indices=(0, 3), - backbone_channels=(24, 160), - **kwargs) - - -@manager.MODELS.add_component -def deeplabv3p_mobilenetv3_small(*args, **kwargs): - pretrained_model = None - return DeepLabV3P( - backbone='MobileNetV3_small_x1_0', - pretrained_model=pretrained_model, - backbone_indices=(0, 3), - backbone_channels=(16, 96), - **kwargs) diff --git a/dygraph/models/fast_scnn.py b/dygraph/models/fast_scnn.py deleted file mode 100644 index 6bd9b4d6e656bcb3530c50be120293b4f3fb05c6..0000000000000000000000000000000000000000 --- a/dygraph/models/fast_scnn.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle import fluid, nn - -from dygraph.cvlibs import manager -from dygraph.models import model_utils, pspnet -from dygraph.models.architectures import layer_utils - - -@manager.MODELS.add_component -class FastSCNN(fluid.dygraph.Layer): - """ - The FastSCNN implementation. - - As mentioned in original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) - even for high resolution images (1024x2048). - - The orginal artile refers to - Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network." - (https://arxiv.org/pdf/1902.04502.pdf) - - Args: - - num_classes (int): the unique number of target classes. Default to 2. - - enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. - if true, auxiliary loss will be added after LearningToDownsample module, where the weight is 0.4. Default to False. - - ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255. - """ - - def __init__(self, - num_classes=2, - enable_auxiliary_loss=False, - ignore_index=255): - - super(FastSCNN, self).__init__() - - self.learning_to_downsample = LearningToDownsample(32, 48, 64) - self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3]) - self.feature_fusion = FeatureFusionModule(64, 128, 128) - self.classifier = Classifier(128, num_classes) - - if enable_auxiliary_loss: - self.auxlayer = model_utils.AuxLayer(64, 32, num_classes) - - self.enable_auxiliary_loss = enable_auxiliary_loss - self.ignore_index = ignore_index - - def forward(self, input, label=None): - - higher_res_features = self.learning_to_downsample(input) - x = self.global_feature_extractor(higher_res_features) - x = self.feature_fusion(higher_res_features, x) - logit = self.classifier(x) - logit = fluid.layers.resize_bilinear(logit, input.shape[2:]) - - if self.enable_auxiliary_loss: - auxiliary_logit = self.auxlayer(higher_res_features) - auxiliary_logit = fluid.layers.resize_bilinear(auxiliary_logit, input.shape[2:]) - - if self.training: - loss = model_utils.get_loss(logit, label) - if self.enable_auxiliary_loss: - auxiliary_loss = model_utils.get_loss(auxiliary_logit, label) - loss += (0.4 * auxiliary_loss) - return loss - else: - pred, score_map = model_utils.get_pred_score_map(logit) - return pred, score_map - - -class LearningToDownsample(fluid.dygraph.Layer): - """ - Learning to downsample module. - - This module consists of three downsampling blocks (one Conv and two separable Conv) - - Args: - dw_channels1 (int): the input channels of the first sep conv. Default to 32. - - dw_channels2 (int): the input channels of the second sep conv. Default to 48. - - out_channels (int): the output channels of LearningToDownsample module. Default to 64. - """ - - def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): - super(LearningToDownsample, self).__init__() - - self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=3, - num_filters=dw_channels1, - filter_size=3, - stride=2) - self.dsconv_bn_relu1 = layer_utils.ConvBnRelu(num_channels=dw_channels1, - num_filters=dw_channels2, - filter_size=3, - using_sep_conv=True, # using sep conv - stride=2, - padding=1) - self.dsconv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=dw_channels2, - num_filters=out_channels, - filter_size=3, - using_sep_conv=True, # using sep conv - stride=2, - padding=1) - - def forward(self, x): - x = self.conv_bn_relu(x) - x = self.dsconv_bn_relu1(x) - x = self.dsconv_bn_relu2(x) - return x - - -class GlobalFeatureExtractor(fluid.dygraph.Layer): - """ - Global feature extractor module - - This module consists of three LinearBottleneck blocks (like inverted residual introduced by MobileNetV2) and - a PPModule (introduced by PSPNet). - - Args: - in_channels (int): the number of input channels to the module. Default to 64. - block_channels (tuple): a tuple represents output channels of each bottleneck block. Default to (64, 96, 128). - out_channels (int): the number of output channels of the module. Default to 128. - expansion (int): the expansion factor in bottleneck. Default to 6. - num_blocks (tuple): it indicates the repeat time of each bottleneck. Default to (3, 3, 3). - """ - - def __init__(self, in_channels=64, block_channels=(64, 96, 128), - out_channels=128, expansion=6, num_blocks=(3, 3, 3)): - super(GlobalFeatureExtractor, self).__init__() - - self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], expansion, - 2) - self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], - expansion, 2) - self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], - expansion, 1) - - self.ppm = pspnet.PPModule(block_channels[2], out_channels, dim_reduction=True) - - def _make_layer(self, block, in_channels, out_channels, blocks, expansion=6, stride=1): - layers = [] - layers.append(block(in_channels, out_channels, expansion, stride)) - for i in range(1, blocks): - layers.append(block(out_channels, out_channels, expansion, 1)) - return nn.Sequential(*layers) - - def forward(self, x): - x = self.bottleneck1(x) - x = self.bottleneck2(x) - x = self.bottleneck3(x) - x = self.ppm(x) - return x - - -class LinearBottleneck(fluid.dygraph.Layer): - """ - Single bottleneck implementation. - - Args: - in_channels (int): the number of input channels to bottleneck block. - - out_channels (int): the number of output channels of bottleneck block. - - expansion (int). the expansion factor in bottleneck. Default to 6. - - stride (int). the stride used in depth-wise conv. - """ - - def __init__(self, in_channels, out_channels, expansion=6, stride=2, **kwargs): - super(LinearBottleneck, self).__init__() - - self.use_shortcut = stride == 1 and in_channels == out_channels - - expand_channels = in_channels * expansion - self.block = nn.Sequential( - # pw - layer_utils.ConvBnRelu(num_channels=in_channels, - num_filters=expand_channels, - filter_size=1, - bias_attr=False), - # dw - layer_utils.ConvBnRelu(num_channels=expand_channels, - num_filters=expand_channels, - filter_size=3, - stride=stride, - padding=1, - groups=expand_channels, - bias_attr=False), - # pw-linear - nn.Conv2D(num_channels=expand_channels, - num_filters=out_channels, - filter_size=1, - bias_attr=False), - - nn.BatchNorm(out_channels) - ) - - def forward(self, x): - out = self.block(x) - if self.use_shortcut: - out = x + out - return out - - -class FeatureFusionModule(fluid.dygraph.Layer): - """ - Feature Fusion Module Implememtation. - - This module fuses high-resolution feature and low-resolution feature. - - Args: - high_in_channels (int): the channels of high-resolution feature (output of LearningToDownsample). - - low_in_channels (int). the channels of low-resolution feature (output of GlobalFeatureExtractor). - - out_channels (int). the output channels of this module. - """ - - def __init__(self, high_in_channels, low_in_channels, out_channels): - super(FeatureFusionModule, self).__init__() - - # There only depth-wise conv is used WITHOUT point-sied conv - self.dwconv = layer_utils.ConvBnRelu(num_channels=low_in_channels, - num_filters=out_channels, - filter_size=3, - padding=1, - groups=128) - - self.conv_low_res = nn.Sequential( - nn.Conv2D(num_channels=out_channels, num_filters=out_channels, filter_size=1), - nn.BatchNorm(out_channels)) - - self.conv_high_res = nn.Sequential( - nn.Conv2D(num_channels=high_in_channels, num_filters=out_channels, filter_size=1), - nn.BatchNorm(out_channels)) - - self.relu = nn.ReLU(True) - - def forward(self, high_res_input, low_res_input): - low_res_input = fluid.layers.resize_bilinear(input=low_res_input, scale=4) - low_res_input = self.dwconv(low_res_input) - low_res_input = self.conv_low_res(low_res_input) - - high_res_input = self.conv_high_res(high_res_input) - - x = high_res_input + low_res_input - - return self.relu(x) - - -class Classifier(fluid.dygraph.Layer): - """ - The Classifier module implemetation. - - This module consists of two depth-wsie conv and one conv. - - Args: - input_channels (int): the input channels to this module. - - num_classes (int). the unique number of target classes. - - """ - - def __init__(self, input_channels, num_classes): - super(Classifier, self).__init__() - - self.dsconv1 = layer_utils.ConvBnRelu(num_channels=input_channels, - num_filters=input_channels, - filter_size=3, - using_sep_conv=True # using sep conv - ) - - self.dsconv2 = layer_utils.ConvBnRelu(num_channels=input_channels, - num_filters=input_channels, - filter_size=3, - using_sep_conv=True # using sep conv - ) - - self.conv = nn.Conv2D(num_channels=input_channels, - num_filters=num_classes, - filter_size=1) - - def forward(self, x): - x = self.dsconv1(x) - x = self.dsconv2(x) - x = fluid.layers.dropout(x, dropout_prob=0.1) - x = self.conv(x) - return x diff --git a/dygraph/models/fcn.py b/dygraph/models/fcn.py deleted file mode 100644 index 5201cc98b090cc6e0fd48e093a8b43c277aca631..0000000000000000000000000000000000000000 --- a/dygraph/models/fcn.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear -from paddle.fluid.initializer import Normal -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.cvlibs import manager -from dygraph import utils -from dygraph.cvlibs import param_init -from dygraph.utils import logger - -__all__ = [ - "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18", - "fcn_hrnet_w30", "fcn_hrnet_w32", "fcn_hrnet_w40", "fcn_hrnet_w44", - "fcn_hrnet_w48", "fcn_hrnet_w60", "fcn_hrnet_w64" -] - - -@manager.MODELS.add_component -class FCN(fluid.dygraph.Layer): - """ - Fully Convolutional Networks for Semantic Segmentation. - https://arxiv.org/abs/1411.4038 - - Args: - num_classes (int): the unique number of target classes. - - backbone (paddle.nn.Layer): backbone networks. - - model_pretrained (str): the path of pretrained model. - - backbone_indices (tuple): one values in the tuple indicte the indices of output of backbone.Default -1. - - backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. - - channels (int): channels after conv layer before the last one. - """ - - def __init__(self, - num_classes, - backbone, - backbone_pretrained=None, - model_pretrained=None, - backbone_indices=(-1, ), - backbone_channels=(270, ), - channels=None): - super(FCN, self).__init__() - - self.num_classes = num_classes - self.backbone_pretrained = backbone_pretrained - self.model_pretrained = model_pretrained - self.backbone_indices = backbone_indices - if channels is None: - channels = backbone_channels[backbone_indices[0]] - - self.backbone = backbone - self.conv_last_2 = ConvBNLayer( - num_channels=backbone_channels[backbone_indices[0]], - num_filters=channels, - filter_size=1, - stride=1) - self.conv_last_1 = Conv2D( - num_channels=channels, - num_filters=self.num_classes, - filter_size=1, - stride=1, - padding=0) - if self.training: - self.init_weight() - - def forward(self, x): - input_shape = x.shape[2:] - fea_list = self.backbone(x) - x = fea_list[self.backbone_indices[0]] - x = self.conv_last_2(x) - logit = self.conv_last_1(x) - logit = fluid.layers.resize_bilinear(logit, input_shape) - return [logit] - - def init_weight(self): - params = self.parameters() - for param in params: - param_name = param.name - if 'batch_norm' in param_name: - if 'w_0' in param_name: - param_init.constant_init(param, value=1.0) - elif 'b_0' in param_name: - param_init.constant_init(param, value=0.0) - if 'conv' in param_name and 'w_0' in param_name: - param_init.normal_init(param, scale=0.001) - - if self.model_pretrained is not None: - if os.path.exists(self.model_pretrained): - utils.load_pretrained_model(self, self.model_pretrained) - else: - raise Exception('Pretrained model is not found: {}'.format( - self.model_pretrained)) - elif self.backbone_pretrained is not None: - if os.path.exists(self.backbone_pretrained): - utils.load_pretrained_model(self.backbone, - self.backbone_pretrained) - else: - raise Exception('Pretrained model is not found: {}'.format( - self.backbone_pretrained)) - else: - logger.warning('No pretrained model to load, train from scratch') - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - stride=1, - groups=1, - act="relu"): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - bias_attr=False) - self._batch_norm = BatchNorm(num_filters) - self.act = act - - def forward(self, input): - y = self._conv(input) - y = self._batch_norm(y) - if self.act == 'relu': - y = fluid.layers.relu(y) - return y - - -@manager.MODELS.add_component -def fcn_hrnet_w18_small_v1(*args, **kwargs): - return FCN(backbone='HRNet_W18_Small_V1', backbone_channels=(240), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w18_small_v2(*args, **kwargs): - return FCN(backbone='HRNet_W18_Small_V2', backbone_channels=(270), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w18(*args, **kwargs): - return FCN(backbone='HRNet_W18', backbone_channels=(270), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w30(*args, **kwargs): - return FCN(backbone='HRNet_W30', backbone_channels=(450), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w32(*args, **kwargs): - return FCN(backbone='HRNet_W32', backbone_channels=(480), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w40(*args, **kwargs): - return FCN(backbone='HRNet_W40', backbone_channels=(600), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w44(*args, **kwargs): - return FCN(backbone='HRNet_W44', backbone_channels=(660), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w48(*args, **kwargs): - return FCN(backbone='HRNet_W48', backbone_channels=(720), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w60(*args, **kwargs): - return FCN(backbone='HRNet_W60', backbone_channels=(900), **kwargs) - - -@manager.MODELS.add_component -def fcn_hrnet_w64(*args, **kwargs): - return FCN(backbone='HRNet_W64', backbone_channels=(960), **kwargs) diff --git a/dygraph/models/losses/__init__.py b/dygraph/models/losses/__init__.py deleted file mode 100644 index f58a9fe1dccce025fa5ee9dec8887fbfc3b9deb8..0000000000000000000000000000000000000000 --- a/dygraph/models/losses/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .cross_entroy_loss import CrossEntropyLoss diff --git a/dygraph/models/losses/cross_entroy_loss.py b/dygraph/models/losses/cross_entroy_loss.py deleted file mode 100644 index a3de4eb68e4b13cdc66d182218433308c0902398..0000000000000000000000000000000000000000 --- a/dygraph/models/losses/cross_entroy_loss.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -from paddle import nn -import paddle.nn.functional as F -import paddle.fluid as fluid - -from dygraph.cvlibs import manager -''' -@manager.LOSSES.add_component -class CrossEntropyLoss(nn.CrossEntropyLoss): - """ - Implements the cross entropy loss function. - - Args: - weight (Tensor): Weight tensor, a manual rescaling weight given - to each class and the shape is (C). It has the same dimensions as class - number and the data type is float32, float64. Default ``'None'``. - ignore_index (int64): Specifies a target value that is ignored - and does not contribute to the input gradient. Default ``255``. - reduction (str): Indicate how to average the loss by batch_size, - the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; - If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. - If :attr:`reduction` is ``'none'``, the unreduced loss is returned. - Default ``'mean'``. - - """ - - def __init__(self, weight=None, ignore_index=255, reduction='mean'): - self.weight = weight - self.ignore_index = ignore_index - self.reduction = reduction - self.EPS = 1e-5 - if self.reduction not in ['sum', 'mean', 'none']: - raise ValueError( - "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" - " 'none', but received %s, which is not allowed." % - self.reduction) - - def forward(self, logit, label): - """ - Forward computation. - Args: - logit (Tensor): logit tensor, the data type is float32, float64. Shape is - (N, C), where C is number of classes, and if shape is more than 2D, this - is (N, C, D1, D2,..., Dk), k >= 1. - label (Variable): label tensor, the data type is int64. Shape is (N), where each - value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is - (N, D1, D2,..., Dk), k >= 1. - """ - loss = paddle.nn.functional.cross_entropy( - logit, - label, - weight=self.weight, - ignore_index=self.ignore_index, - reduction=self.reduction) - - mask = label != self.ignore_index - mask = paddle.cast(mask, 'float32') - avg_loss = loss / (paddle.mean(mask) + self.EPS) - - label.stop_gradient = True - mask.stop_gradient = True - return avg_loss -''' - - -@manager.LOSSES.add_component -class CrossEntropyLoss(nn.Layer): - """ - Implements the cross entropy loss function. - - Args: - ignore_index (int64): Specifies a target value that is ignored - and does not contribute to the input gradient. Default ``255``. - """ - - def __init__(self, ignore_index=255): - super(CrossEntropyLoss, self).__init__() - self.ignore_index = ignore_index - self.EPS = 1e-5 - - def forward(self, logit, label): - """ - Forward computation. - Args: - logit (Tensor): logit tensor, the data type is float32, float64. Shape is - (N, C), where C is number of classes, and if shape is more than 2D, this - is (N, C, D1, D2,..., Dk), k >= 1. - label (Variable): label tensor, the data type is int64. Shape is (N), where each - value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is - (N, D1, D2,..., Dk), k >= 1. - """ - if len(label.shape) != len(logit.shape): - label = paddle.unsqueeze(label, 1) - - # logit = paddle.transpose(logit, [0, 2, 3, 1]) - # label = paddle.transpose(label, [0, 2, 3, 1]) - # loss = F.softmax_with_cross_entropy( - # logit, label, ignore_index=self.ignore_index, axis=-1) - # loss = paddle.reduce_mean(loss) - - # mask = label != self.ignore_index - # mask = paddle.cast(mask, 'float32') - # avg_loss = loss / (paddle.mean(mask) + self.EPS) - - # label.stop_gradient = True - # mask.stop_gradient = True - # return avg_loss - - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - mask = label != self.ignore_index - mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=self.ignore_index, - return_softmax=True, - axis=-1) - - loss = loss * mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(mask) + self.EPS) - - label.stop_gradient = True - mask.stop_gradient = True - return avg_loss diff --git a/dygraph/models/model_utils.py b/dygraph/models/model_utils.py deleted file mode 100644 index 7f52919915faf3fa2cca6b567e0c6b8a105e7e0b..0000000000000000000000000000000000000000 --- a/dygraph/models/model_utils.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- encoding: utf-8 -*- -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import paddle.nn.functional as F -from paddle import fluid -from paddle.fluid import dygraph -from paddle.fluid.dygraph import Conv2D -#from paddle.nn import SyncBatchNorm as BatchNorm -from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm - -from dygraph.models.architectures import layer_utils - - -class FCNHead(fluid.dygraph.Layer): - """ - The FCNHead implementation used in auxilary layer - - Args: - in_channels (int): the number of input channels - out_channels (int): the number of output channels - """ - - def __init__(self, in_channels, out_channels): - super(FCNHead, self).__init__() - - inter_channels = in_channels // 4 - self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels, - num_filters=inter_channels, - filter_size=3, - padding=1) - - self.conv = Conv2D(num_channels=inter_channels, - num_filters=out_channels, - filter_size=1) - - def forward(self, x): - x = self.conv_bn_relu(x) - x = F.dropout(x, dropout_prob=0.1) - x = self.conv(x) - return x - -class AuxLayer(fluid.dygraph.Layer): - """ - The auxilary layer implementation for auxilary loss - - Args: - in_channels (int): the number of input channels. - inter_channels (int): intermediate channels. - out_channels (int): the number of output channels, which is usually num_classes. - """ - - def __init__(self, in_channels, inter_channels, out_channels): - super(AuxLayer, self).__init__() - - self.conv_bn_relu = layer_utils.ConvBnRelu(num_channels=in_channels, - num_filters=inter_channels, - filter_size=3, - padding=1) - - self.conv = Conv2D(num_channels=inter_channels, - num_filters=out_channels, - filter_size=1) - - def forward(self, x): - x = self.conv_bn_relu(x) - x = F.dropout(x, dropout_prob=0.1) - x = self.conv(x) - return x - -def get_loss(logit, label, ignore_index=255, EPS=1e-5): - """ - compute forward loss of the model - - Args: - logit (tensor): the logit of model output - label (tensor): ground truth - - Returns: - avg_loss (tensor): forward loss - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - mask = label != ignore_index - mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=ignore_index, - return_softmax=True, - axis=-1) - - loss = loss * mask - avg_loss = paddle.mean(loss) / (paddle.mean(mask) + EPS) - - label.stop_gradient = True - mask.stop_gradient = True - - return avg_loss - - -def get_pred_score_map(logit): - """ - Get prediction and score map output in inference phase. - - Args: - logit (tensor): output logit of network - - Returns: - pred (tensor): predition map - score_map (tensor): score map - """ - score_map = F.softmax(logit, axis=1) - score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) - pred = fluid.layers.argmax(score_map, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - return pred, score_map \ No newline at end of file diff --git a/dygraph/models/ocrnet.py b/dygraph/models/ocrnet.py deleted file mode 100644 index bdadd6d5b2a1e1946a9207eaa166705fb51da06e..0000000000000000000000000000000000000000 --- a/dygraph/models/ocrnet.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle.fluid as fluid -from paddle.fluid.dygraph import Sequential, Conv2D - -from dygraph.cvlibs import manager -from dygraph.models.architectures.layer_utils import ConvBnRelu -from dygraph import utils - - -class SpatialGatherBlock(fluid.dygraph.Layer): - def forward(self, pixels, regions): - n, c, h, w = pixels.shape - _, k, _, _ = regions.shape - - # pixels: from (n, c, h, w) to (n, h*w, c) - pixels = fluid.layers.reshape(pixels, (n, c, h * w)) - pixels = fluid.layers.transpose(pixels, (0, 2, 1)) - - # regions: from (n, k, h, w) to (n, k, h*w) - regions = fluid.layers.reshape(regions, (n, k, h * w)) - regions = fluid.layers.softmax(regions, axis=2) - - # feats: from (n, k, c) to (n, c, k, 1) - feats = fluid.layers.matmul(regions, pixels) - feats = fluid.layers.transpose(feats, (0, 2, 1)) - feats = fluid.layers.unsqueeze(feats, axes=[-1]) - - return feats - - -class SpatialOCRModule(fluid.dygraph.Layer): - def __init__(self, - in_channels, - key_channels, - out_channels, - dropout_rate=0.1): - super(SpatialOCRModule, self).__init__() - - self.attention_block = ObjectAttentionBlock(in_channels, key_channels) - self.dropout_rate = dropout_rate - self.conv1x1 = Conv2D(2 * in_channels, out_channels, 1) - - def forward(self, pixels, regions): - context = self.attention_block(pixels, regions) - feats = fluid.layers.concat([context, pixels], axis=1) - - feats = self.conv1x1(feats) - feats = fluid.layers.dropout(feats, self.dropout_rate) - - return feats - - -class ObjectAttentionBlock(fluid.dygraph.Layer): - def __init__(self, in_channels, key_channels): - super(ObjectAttentionBlock, self).__init__() - - self.in_channels = in_channels - self.key_channels = key_channels - - self.f_pixel = Sequential( - ConvBnRelu(in_channels, key_channels, 1), - ConvBnRelu(key_channels, key_channels, 1)) - - self.f_object = Sequential( - ConvBnRelu(in_channels, key_channels, 1), - ConvBnRelu(key_channels, key_channels, 1)) - - self.f_down = ConvBnRelu(in_channels, key_channels, 1) - - self.f_up = ConvBnRelu(key_channels, in_channels, 1) - - def forward(self, x, proxy): - n, _, h, w = x.shape - - # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels) - query = self.f_pixel(x) - query = fluid.layers.reshape(query, (n, self.key_channels, -1)) - query = fluid.layers.transpose(query, (0, 2, 1)) - - # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2) - key = self.f_object(proxy) - key = fluid.layers.reshape(key, (n, self.key_channels, -1)) - - # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels) - value = self.f_down(proxy) - value = fluid.layers.reshape(value, (n, self.key_channels, -1)) - value = fluid.layers.transpose(value, (0, 2, 1)) - - # sim_map (n, h1*w1, h2*w2) - sim_map = fluid.layers.matmul(query, key) - sim_map = (self.key_channels**-.5) * sim_map - sim_map = fluid.layers.softmax(sim_map, axis=-1) - - # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1) - context = fluid.layers.matmul(sim_map, value) - context = fluid.layers.transpose(context, (0, 2, 1)) - context = fluid.layers.reshape(context, (n, self.key_channels, h, w)) - context = self.f_up(context) - - return context - - -@manager.MODELS.add_component -class OCRNet(fluid.dygraph.Layer): - def __init__(self, - num_classes, - backbone, - model_pretrained=None, - in_channels=None, - ocr_mid_channels=512, - ocr_key_channels=256, - ignore_index=255): - super(OCRNet, self).__init__() - - self.ignore_index = ignore_index - self.num_classes = num_classes - self.EPS = 1e-5 - - self.backbone = backbone - self.spatial_gather = SpatialGatherBlock() - self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels, - ocr_mid_channels) - self.conv3x3_ocr = ConvBnRelu( - in_channels, ocr_mid_channels, 3, padding=1) - self.cls_head = Conv2D(ocr_mid_channels, self.num_classes, 1) - - self.aux_head = Sequential( - ConvBnRelu(in_channels, in_channels, 3, padding=1), - Conv2D(in_channels, self.num_classes, 1)) - - self.init_weight(model_pretrained) - - def forward(self, x, label=None): - feats = self.backbone(x) - - soft_regions = self.aux_head(feats) - pixels = self.conv3x3_ocr(feats) - - object_regions = self.spatial_gather(pixels, soft_regions) - ocr = self.spatial_ocr(pixels, object_regions) - - logit = self.cls_head(ocr) - logit = fluid.layers.resize_bilinear(logit, x.shape[2:]) - - if self.training: - soft_regions = fluid.layers.resize_bilinear(soft_regions, - x.shape[2:]) - cls_loss = self._get_loss(logit, label) - aux_loss = self._get_loss(soft_regions, label) - return cls_loss + 0.4 * aux_loss - - score_map = fluid.layers.softmax(logit, axis=1) - score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) - pred = fluid.layers.argmax(score_map, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - return pred, score_map - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model.. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - def _get_loss(self, logit, label): - """ - compute forward loss of the model - - Args: - logit (tensor): the logit of model output - label (tensor): ground truth - - Returns: - avg_loss (tensor): forward loss - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - mask = label != self.ignore_index - mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=self.ignore_index, - return_softmax=True, - axis=-1) - - loss = loss * mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(mask) + self.EPS) - - label.stop_gradient = True - mask.stop_gradient = True - - return avg_loss diff --git a/dygraph/models/pspnet.py b/dygraph/models/pspnet.py deleted file mode 100644 index 0e376e21ca7d6c57b2d0b121e82a3ca0f5a57c10..0000000000000000000000000000000000000000 --- a/dygraph/models/pspnet.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle.nn.functional as F -from paddle import fluid -from paddle.fluid.dygraph import Conv2D - -from dygraph.cvlibs import manager -from dygraph.models import model_utils -from dygraph.models.architectures import layer_utils -from dygraph.utils import utils - - -class PSPNet(fluid.dygraph.Layer): - """ - The PSPNet implementation - - The orginal artile refers to - Zhao, Hengshuang, et al. "Pyramid scene parsing network." - Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. - (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) - - Args: - num_classes (int): the unique number of target classes. - - backbone (Paddle.nn.Layer): backbone name, currently support Resnet50/101. - - model_pretrained (str): the path of pretrained model. - - output_stride (int): the ratio of input size and final feature size. Default 16. - - backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. - the first index will be taken as a deep-supervision feature in auxiliary layer; - the second one will be taken as input of Pyramid Pooling Module (PPModule). - Usually backbone consists of four downsampling stage, and return an output of - each stage, so we set default (2, 3), which means taking feature map of the third - stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule. - - backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. - - pp_out_channels (int): output channels after Pyramid Pooling Module. Default to 1024. - - bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). - - enable_auxiliary_loss (bool): a bool values indictes whether adding auxiliary loss. Default to True. - - ignore_index (int): the value of ground-truth mask would be ignored while doing evaluation. Default to 255. - """ - - def __init__(self, - num_classes, - backbone, - model_pretrained=None, - output_stride=16, - backbone_indices=(2, 3), - backbone_channels=(1024, 2048), - pp_out_channels=1024, - bin_sizes=(1, 2, 3, 6), - enable_auxiliary_loss=True, - ignore_index=255): - - super(PSPNet, self).__init__() - # self.backbone = manager.BACKBONES[backbone](output_stride=output_stride, - # multi_grid=(1, 1, 1)) - self.backbone = backbone - self.backbone_indices = backbone_indices - - self.psp_module = PPModule( - in_channels=backbone_channels[1], - out_channels=pp_out_channels, - bin_sizes=bin_sizes) - - self.conv = Conv2D( - num_channels=pp_out_channels, - num_filters=num_classes, - filter_size=1) - - if enable_auxiliary_loss: - self.fcn_head = model_utils.FCNHead( - in_channels=backbone_channels[0], out_channels=num_classes) - - self.enable_auxiliary_loss = enable_auxiliary_loss - self.ignore_index = ignore_index - - self.init_weight(model_pretrained) - - def forward(self, input, label=None): - - _, feat_list = self.backbone(input) - - x = feat_list[self.backbone_indices[1]] - x = self.psp_module(x) - x = F.dropout(x, dropout_prob=0.1) - logit = self.conv(x) - logit = fluid.layers.resize_bilinear(logit, input.shape[2:]) - - if self.enable_auxiliary_loss: - auxiliary_feat = feat_list[self.backbone_indices[0]] - auxiliary_logit = self.fcn_head(auxiliary_feat) - auxiliary_logit = fluid.layers.resize_bilinear( - auxiliary_logit, input.shape[2:]) - - if self.training: - loss = model_utils.get_loss(logit, label) - if self.enable_auxiliary_loss: - auxiliary_loss = model_utils.get_loss(auxiliary_logit, label) - loss += (0.4 * auxiliary_loss) - return loss - - else: - pred, score_map = model_utils.get_pred_score_map(logit) - return pred, score_map - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - -class PPModule(fluid.dygraph.Layer): - """ - Pyramid pooling module - - Args: - in_channels (int): the number of intput channels to pyramid pooling module. - - out_channels (int): the number of output channels after pyramid pooling module. - - bin_sizes (tuple): the out size of pooled feature maps. Default to (1,2,3,6). - - dim_reduction (bool): a bool value represent if reduing dimention after pooling. Default to True. - """ - - def __init__(self, in_channels, out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True): - super(PPModule, self).__init__() - self.bin_sizes = bin_sizes - - inter_channels = in_channels - if dim_reduction: - inter_channels = in_channels // len(bin_sizes) - - # we use dimension reduction after pooling mentioned in original implementation. - self.stages = fluid.dygraph.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes]) - - self.conv_bn_relu2 = layer_utils.ConvBnRelu(num_channels=in_channels + inter_channels * len(bin_sizes), - num_filters=out_channels, - filter_size=3, - padding=1) - - def _make_stage(self, in_channels, out_channels, size): - """ - Create one pooling layer. - - In our implementation, we adopt the same dimention reduction as the original paper that might be - slightly different with other implementations. - - After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations - keep the channels to be same. - - - Args: - in_channels (int): the number of intput channels to pyramid pooling module. - - size (int): the out size of the pooled layer. - - Returns: - conv (tensor): a tensor after Pyramid Pooling Module - """ - - # this paddle version does not support AdaptiveAvgPool2d, so skip it here. - # prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) - conv = layer_utils.ConvBnRelu(num_channels=in_channels, - num_filters=out_channels, - filter_size=1) - - return conv - - def forward(self, input): - cat_layers = [] - for i, stage in enumerate(self.stages): - size = self.bin_sizes[i] - x = fluid.layers.adaptive_pool2d( - input, pool_size=(size, size), pool_type="max") - x = stage(x) - x = fluid.layers.resize_bilinear(x, out_shape=input.shape[2:]) - cat_layers.append(x) - cat_layers = [input] + cat_layers[::-1] - cat = fluid.layers.concat(cat_layers, axis=1) - out = self.conv_bn_relu2(cat) - - return out - - -@manager.MODELS.add_component -def pspnet_resnet101_vd(*args, **kwargs): - pretrained_model = None - return PSPNet( - backbone='ResNet101_vd', pretrained_model=pretrained_model, **kwargs) - - -@manager.MODELS.add_component -def pspnet_resnet101_vd_os8(*args, **kwargs): - pretrained_model = None - return PSPNet( - backbone='ResNet101_vd', - output_stride=8, - pretrained_model=pretrained_model, - **kwargs) - - -@manager.MODELS.add_component -def pspnet_resnet50_vd(*args, **kwargs): - pretrained_model = None - return PSPNet( - backbone='ResNet50_vd', pretrained_model=pretrained_model, **kwargs) - - -@manager.MODELS.add_component -def pspnet_resnet50_vd_os8(*args, **kwargs): - pretrained_model = None - return PSPNet( - backbone='ResNet50_vd', - output_stride=8, - pretrained_model=pretrained_model, - **kwargs) diff --git a/dygraph/models/unet.py b/dygraph/models/unet.py deleted file mode 100644 index e2a7c007caa68a74deb322cc4d4d8b66a1b75035..0000000000000000000000000000000000000000 --- a/dygraph/models/unet.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle.fluid as fluid -from paddle.fluid.dygraph import Conv2D, Pool2D -from paddle.nn import SyncBatchNorm as BatchNorm - -from dygraph.cvlibs import manager -from dygraph import utils - - -class UNet(fluid.dygraph.Layer): - """ - U-Net: Convolutional Networks for Biomedical Image Segmentation. - https://arxiv.org/abs/1505.04597 - - Args: - num_classes (int): the unique number of target classes. - pretrained_model (str): the path of pretrained model. - ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255. - """ - - def __init__(self, num_classes, model_pretrained=None, ignore_index=255): - super(UNet, self).__init__() - self.encode = UnetEncoder() - self.decode = UnetDecode() - self.get_logit = GetLogit(64, num_classes) - self.ignore_index = ignore_index - self.EPS = 1e-5 - - self.init_weight(model_pretrained) - - def forward(self, x, label=None): - encode_data, short_cuts = self.encode(x) - decode_data = self.decode(encode_data, short_cuts) - logit = self.get_logit(decode_data) - if self.training: - return self._get_loss(logit, label) - else: - score_map = fluid.layers.softmax(logit, axis=1) - score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1]) - pred = fluid.layers.argmax(score_map, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - return pred, score_map - - def init_weight(self, pretrained_model=None): - """ - Initialize the parameters of model parts. - Args: - pretrained_model ([str], optional): the path of pretrained model. Defaults to None. - """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - - def _get_loss(self, logit, label): - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - mask = label != self.ignore_index - mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=self.ignore_index, - return_softmax=True, - axis=-1) - - loss = loss * mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(mask) + self.EPS) - - label.stop_gradient = True - mask.stop_gradient = True - return avg_loss - - -class UnetEncoder(fluid.dygraph.Layer): - def __init__(self): - super(UnetEncoder, self).__init__() - self.double_conv = DoubleConv(3, 64) - self.down1 = Down(64, 128) - self.down2 = Down(128, 256) - self.down3 = Down(256, 512) - self.down4 = Down(512, 512) - - def forward(self, x): - short_cuts = [] - x = self.double_conv(x) - short_cuts.append(x) - x = self.down1(x) - short_cuts.append(x) - x = self.down2(x) - short_cuts.append(x) - x = self.down3(x) - short_cuts.append(x) - x = self.down4(x) - return x, short_cuts - - -class UnetDecode(fluid.dygraph.Layer): - def __init__(self): - super(UnetDecode, self).__init__() - self.up1 = Up(512, 256) - self.up2 = Up(256, 128) - self.up3 = Up(128, 64) - self.up4 = Up(64, 64) - - def forward(self, x, short_cuts): - x = self.up1(x, short_cuts[3]) - x = self.up2(x, short_cuts[2]) - x = self.up3(x, short_cuts[1]) - x = self.up4(x, short_cuts[0]) - return x - - -class DoubleConv(fluid.dygraph.Layer): - def __init__(self, num_channels, num_filters): - super(DoubleConv, self).__init__() - self.conv0 = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=1, - padding=1) - self.bn0 = BatchNorm(num_filters) - self.conv1 = Conv2D( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=1, - padding=1) - self.bn1 = BatchNorm(num_filters) - - def forward(self, x): - x = self.conv0(x) - x = self.bn0(x) - x = fluid.layers.relu(x) - x = self.conv1(x) - x = self.bn1(x) - x = fluid.layers.relu(x) - return x - - -class Down(fluid.dygraph.Layer): - def __init__(self, num_channels, num_filters): - super(Down, self).__init__() - self.max_pool = Pool2D( - pool_size=2, pool_type='max', pool_stride=2, pool_padding=0) - self.double_conv = DoubleConv(num_channels, num_filters) - - def forward(self, x): - x = self.max_pool(x) - x = self.double_conv(x) - return x - - -class Up(fluid.dygraph.Layer): - def __init__(self, num_channels, num_filters): - super(Up, self).__init__() - self.double_conv = DoubleConv(2 * num_channels, num_filters) - - def forward(self, x, short_cut): - short_cut_shape = fluid.layers.shape(short_cut) - x = fluid.layers.resize_bilinear(x, short_cut_shape[2:]) - x = fluid.layers.concat([x, short_cut], axis=1) - x = self.double_conv(x) - return x - - -class GetLogit(fluid.dygraph.Layer): - def __init__(self, num_channels, num_classes): - super(GetLogit, self).__init__() - self.conv = Conv2D( - num_channels=num_channels, - num_filters=num_classes, - filter_size=3, - stride=1, - padding=1) - - def forward(self, x): - x = self.conv(x) - return x - - -@manager.MODELS.add_component -def unet(*args, **kwargs): - return UNet(*args, **kwargs) diff --git a/dygraph/paddleseg/datasets/rice.py b/dygraph/paddleseg/datasets/rice.py deleted file mode 100644 index f8041526fa2e265e0eac70709e9c295e860df9ad..0000000000000000000000000000000000000000 --- a/dygraph/paddleseg/datasets/rice.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from .dataset import Dataset - - -class Rice(Dataset): - def __init__(self, transforms=None, mode='train', download=True): - self.data_dir = "/mnt/liuyi22/PaddlePaddle/POC/rice_dataset" - self.transforms = transforms - self.file_list = list() - self.mode = mode - self.num_classes = 2 - - if mode.lower() not in ['train', 'eval', 'test']: - raise Exception( - "mode should be 'train', 'eval' or 'test', but got {}.".format( - mode)) - - if self.transforms is None: - raise Exception("transform is necessary, but it is None.") - - if mode == 'train': - file_list = os.path.join(self.data_dir, 'train_list.txt') - elif mode == 'eval': - file_list = os.path.join(self.data_dir, 'val_list.txt') - else: - file_list = os.path.join(self.data_dir, 'test_list.txt') - - with open(file_list, 'r') as f: - for line in f: - items = line.strip().split() - if len(items) != 2: - if mode == 'train' or mode == 'eval': - raise Exception( - "File list format incorrect! It should be" - " image_name label_name\\n") - image_path = os.path.join(self.data_dir, items[0]) - grt_path = None - else: - image_path = os.path.join(self.data_dir, items[0]) - grt_path = os.path.join(self.data_dir, items[1]) - self.file_list.append([image_path, grt_path]) diff --git a/dygraph/paddleseg/models/ann.py b/dygraph/paddleseg/models/ann.py index 48c381d26308ac6c6632abcd202b84409e22e7f7..3cde299280d7a39edb14787502ad440acac12ea5 100644 --- a/dygraph/paddleseg/models/ann.py +++ b/dygraph/paddleseg/models/ann.py @@ -17,8 +17,9 @@ import os import paddle import paddle.nn.functional as F from paddle import nn + from paddleseg.cvlibs import manager -from paddleseg.models.common import layer_utils, model_utils +from paddleseg.models.common import layer_libs from paddleseg.utils import utils @@ -88,7 +89,7 @@ class ANN(nn.Layer): psp_size=psp_size) self.context = nn.Sequential( - layer_utils.ConvBnRelu( + layer_libs.ConvBnRelu( in_channels=high_in_channels, out_channels=inter_channels, kernel_size=3, @@ -106,7 +107,7 @@ class ANN(nn.Layer): in_channels=inter_channels, out_channels=num_classes, kernel_size=1) - self.auxlayer = model_utils.AuxLayer( + self.auxlayer = layer_libs.AuxLayer( in_channels=low_in_channels, inter_channels=low_in_channels // 2, out_channels=num_classes, @@ -189,7 +190,7 @@ class AFNB(nn.Layer): key_channels, value_channels, out_channels, size) for size in sizes ]) - self.conv_bn = layer_utils.ConvBn( + self.conv_bn = layer_libs.ConvBn( in_channels=out_channels + high_in_channels, out_channels=out_channels, kernel_size=1) @@ -243,7 +244,7 @@ class APNB(nn.Layer): SelfAttentionBlock_APNB(in_channels, out_channels, key_channels, value_channels, size) for size in sizes ]) - self.conv_bn = layer_utils.ConvBnRelu( + self.conv_bn = layer_libs.ConvBnRelu( in_channels=in_channels * 2, out_channels=out_channels, kernel_size=1) @@ -310,11 +311,11 @@ class SelfAttentionBlock_AFNB(nn.Layer): if out_channels == None: self.out_channels = high_in_channels self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max") - self.f_key = layer_utils.ConvBnRelu( + self.f_key = layer_libs.ConvBnRelu( in_channels=low_in_channels, out_channels=key_channels, kernel_size=1) - self.f_query = layer_utils.ConvBnRelu( + self.f_query = layer_libs.ConvBnRelu( in_channels=high_in_channels, out_channels=key_channels, kernel_size=1) @@ -393,7 +394,7 @@ class SelfAttentionBlock_APNB(nn.Layer): self.value_channels = value_channels self.pool = nn.Pool2D(pool_size=(scale, scale), pool_type="max") - self.f_key = layer_utils.ConvBnRelu( + self.f_key = layer_libs.ConvBnRelu( in_channels=self.in_channels, out_channels=self.key_channels, kernel_size=1) diff --git a/dygraph/paddleseg/models/backbones/mobilenetv3.py b/dygraph/paddleseg/models/backbones/mobilenetv3.py index 6204d7733a45326a70b7cbc423820b987b046708..ac1778ad207945b96d7aacbd88691e5910d4d3b4 100644 --- a/dygraph/paddleseg/models/backbones/mobilenetv3.py +++ b/dygraph/paddleseg/models/backbones/mobilenetv3.py @@ -27,7 +27,7 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout from paddle.nn import SyncBatchNorm as BatchNorm -from paddleseg.models.common import layer_utils +from paddleseg.models.common import layer_libs from paddleseg.cvlibs import manager from paddleseg.utils import utils diff --git a/dygraph/paddleseg/models/backbones/resnet_vd.py b/dygraph/paddleseg/models/backbones/resnet_vd.py index d7dfc66fd5dc44a6a27c04eea73dc692f857c61c..787f6a3b48bd7ee2bf2e0d31ac62f15b704b3e15 100644 --- a/dygraph/paddleseg/models/backbones/resnet_vd.py +++ b/dygraph/paddleseg/models/backbones/resnet_vd.py @@ -28,7 +28,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout from paddle.nn import SyncBatchNorm as BatchNorm from paddleseg.utils import utils -from paddleseg.models.common import layer_utils +from paddleseg.models.common import layer_libs, activation from paddleseg.cvlibs import manager __all__ = [ @@ -77,7 +77,7 @@ class ConvBNLayer(fluid.dygraph.Layer): num_filters, weight_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset')) - self._act_op = layer_utils.Activation(act=act) + self._act_op = activation.Activation(act=act) def forward(self, inputs): if self.is_vd_mode: @@ -213,7 +213,7 @@ class ResNet_vd(fluid.dygraph.Layer): layers=50, class_dim=1000, output_stride=None, - multi_grid=(1, 2, 4)): + multi_grid=(1, 1, 1)): super(ResNet_vd, self).__init__() self.layers = layers diff --git a/dygraph/paddleseg/models/backbones/xception_deeplab.py b/dygraph/paddleseg/models/backbones/xception_deeplab.py index f512e31ab372b8bc453d8d0506bbc45839a08d27..b07d3ac1271baadeb199d1fb39feb70f45f91e23 100644 --- a/dygraph/paddleseg/models/backbones/xception_deeplab.py +++ b/dygraph/paddleseg/models/backbones/xception_deeplab.py @@ -21,7 +21,7 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout from paddle.nn import SyncBatchNorm as BatchNorm -from paddleseg.models.common import layer_utils +from paddleseg.models.common import layer_libs from paddleseg.cvlibs import manager from paddleseg.utils import utils diff --git a/dygraph/paddleseg/models/common/__init__.py b/dygraph/paddleseg/models/common/__init__.py index 9f30b50f2fc80c9effd59dbf3c134de66de04c44..33b2611df67fbfe22604512c1f0d03c9012cb3b4 100644 --- a/dygraph/paddleseg/models/common/__init__.py +++ b/dygraph/paddleseg/models/common/__init__.py @@ -13,5 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import layer_utils -from . import model_utils \ No newline at end of file +from . import layer_libs +from . import activation +from . import pyramid_pool \ No newline at end of file diff --git a/dygraph/paddleseg/models/common/activation.py b/dygraph/paddleseg/models/common/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..69af72e0ea96cd389e48511ff7f7d4bee8680a8a --- /dev/null +++ b/dygraph/paddleseg/models/common/activation.py @@ -0,0 +1,60 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle import nn +from paddle.nn.layer import activation + + +class Activation(nn.Layer): + """ + The wrapper of activations + For example: + >>> relu = Activation("relu") + >>> print(relu) + + >>> sigmoid = Activation("sigmoid") + >>> print(sigmoid) + + >>> not_exit_one = Activation("not_exit_one") + KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + + Args: + act (str): the activation name in lowercase + """ + + def __init__(self, act=None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__all__ + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x): + + if self._act is not None: + return self.act_func(x) + else: + return x \ No newline at end of file diff --git a/dygraph/paddleseg/models/common/layer_utils.py b/dygraph/paddleseg/models/common/layer_libs.py similarity index 59% rename from dygraph/paddleseg/models/common/layer_utils.py rename to dygraph/paddleseg/models/common/layer_libs.py index 8d41ebb130cbbca11feebfd87e030628ea44cd27..8da38bcae5efb9960a012a58dac747136e81941a 100644 --- a/dygraph/paddleseg/models/common/layer_utils.py +++ b/dygraph/paddleseg/models/common/layer_libs.py @@ -70,18 +70,6 @@ class ConvReluPool(nn.Layer): return x -# class ConvBnReluUpsample(nn.Layer): -# def __init__(self, in_channels, out_channels): -# super(ConvBnReluUpsample, self).__init__() -# self.conv_bn_relu = ConvBnRelu(in_channels, out_channels) - -# def forward(self, x, upsample_scale=2): -# x = self.conv_bn_relu(x) -# new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale] -# x = F.resize_bilinear(x, new_shape) -# return x - - class DepthwiseConvBnRelu(nn.Layer): def __init__(self, in_channels, out_channels, kernel_size, **kwargs): super(DepthwiseConvBnRelu, self).__init__() @@ -100,44 +88,43 @@ class DepthwiseConvBnRelu(nn.Layer): return x -class Activation(nn.Layer): +class AuxLayer(nn.Layer): """ - The wrapper of activations - For example: - >>> relu = Activation("relu") - >>> print(relu) - - >>> sigmoid = Activation("sigmoid") - >>> print(sigmoid) - - >>> not_exit_one = Activation("not_exit_one") - KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', - 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', - 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + The auxilary layer implementation for auxilary loss Args: - act (str): the activation name in lowercase + in_channels (int): the number of input channels. + + inter_channels (int): intermediate channels. + + out_channels (int): the number of output channels, which is usually num_classes. + + dropout_prob (float): the droput rate. Default to 0.1. """ - def __init__(self, act=None): - super(Activation, self).__init__() + def __init__(self, + in_channels, + inter_channels, + out_channels, + dropout_prob=0.1): + super(AuxLayer, self).__init__() + + self.conv_bn_relu = ConvBnRelu( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1) - self._act = act - upper_act_names = activation.__all__ - lower_act_names = [act.lower() for act in upper_act_names] - act_dict = dict(zip(lower_act_names, upper_act_names)) + self.conv = nn.Conv2d( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) - if act is not None: - if act in act_dict.keys(): - act_name = act_dict[act] - self.act_func = eval("activation.{}()".format(act_name)) - else: - raise KeyError("{} does not exist in the current {}".format( - act, act_dict.keys())) + self.dropout_prob = dropout_prob def forward(self, x): + x = self.conv_bn_relu(x) + x = F.dropout(x, p=self.dropout_prob) + x = self.conv(x) + return x - if self._act is not None: - return self.act_func(x) - else: - return x diff --git a/dygraph/paddleseg/models/common/model_utils.py b/dygraph/paddleseg/models/common/pyramid_pool.py similarity index 58% rename from dygraph/paddleseg/models/common/model_utils.py rename to dygraph/paddleseg/models/common/pyramid_pool.py index 7de39c8e77fad0021d3e910a9c02f3f6d774c32d..a69eb0f60ff11e5413f813b62183f81a84e2c232 100644 --- a/dygraph/paddleseg/models/common/model_utils.py +++ b/dygraph/paddleseg/models/common/pyramid_pool.py @@ -13,85 +13,96 @@ # See the License for the specific language governing permissions and # limitations under the License. + import paddle from paddle import nn import paddle.nn.functional as F from paddle.nn import SyncBatchNorm as BatchNorm -from paddleseg.models.common import layer_utils +from paddleseg.models.common import layer_libs -class FCNHead(nn.Layer): +class ASPPModule(nn.Layer): """ - The FCNHead implementation used in auxilary layer + Atrous Spatial Pyramid Pooling Args: - in_channels (int): the number of input channels - out_channels (int): the number of output channels - """ + aspp_ratios (tuple): the dilation rate using in ASSP module. - def __init__(self, in_channels, out_channels): - super(FCNHead, self).__init__() - - inter_channels = in_channels // 4 - self.conv_bn_relu = layer_utils.ConvBnRelu( - in_channels=in_channels, - out_channels=inter_channels, - kernel_size=3, - padding=1) - - self.conv = nn.Conv2d( - in_channels=inter_channels, - out_channels=out_channels, - kernel_size=1) + in_channels (int): the number of input channels. - def forward(self, x): - x = self.conv_bn_relu(x) - x = F.dropout(x, p=0.1) - x = self.conv(x) - return x + out_channels (int): the number of output channels. + sep_conv (bool): if using separable conv in ASPP module. -class AuxLayer(nn.Layer): - """ - The auxilary layer implementation for auxilary loss + image_pooling: if augmented with image-level features. - Args: - in_channels (int): the number of input channels. - inter_channels (int): intermediate channels. - out_channels (int): the number of output channels, which is usually num_classes. """ - def __init__(self, - in_channels, - inter_channels, - out_channels, - dropout_prob=0.1): - super(AuxLayer, self).__init__() - - self.conv_bn_relu = layer_utils.ConvBnRelu( - in_channels=in_channels, - out_channels=inter_channels, - kernel_size=3, - padding=1) - - self.conv = nn.Conv2d( - in_channels=inter_channels, - out_channels=out_channels, + def __init__(self, + aspp_ratios, + in_channels, + out_channels, + sep_conv=False, + image_pooling=False): + super(ASPPModule, self).__init__() + + self.aspp_blocks = [] + + for ratio in aspp_ratios: + + if sep_conv and ratio > 1: + conv_func = layer_libs.DepthwiseConvBnRelu + else: + conv_func = layer_libs.ConvBnRelu + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio + ) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(output_size=(1, 1)), + layer_libs.ConvBnRelu(in_channels, out_channels, kernel_size=1, bias_attr=False) + ) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = layer_libs.ConvBnRelu( + in_channels=out_channels * out_size, + out_channels=out_channels, kernel_size=1) - self.dropout_prob = dropout_prob + self.dropout = nn.Dropout(p=0.1) # drop rate def forward(self, x): + + outputs = [] + for block in self.aspp_blocks: + outputs.append(block(x)) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.resize_bilinear(img_avg, out_shape=x.shape[2:]) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) x = self.conv_bn_relu(x) - x = F.dropout(x, p=self.dropout_prob) - x = self.conv(x) - return x + x = self.dropout(x) + return x + class PPModule(nn.Layer): """ - Pyramid pooling module + Pyramid pooling module orginally in PSPNet Args: in_channels (int): the number of intput channels to pyramid pooling module. @@ -109,6 +120,7 @@ class PPModule(nn.Layer): bin_sizes=(1, 2, 3, 6), dim_reduction=True): super(PPModule, self).__init__() + self.bin_sizes = bin_sizes inter_channels = in_channels @@ -121,7 +133,7 @@ class PPModule(nn.Layer): for size in bin_sizes ]) - self.conv_bn_relu2 = layer_utils.ConvBnRelu( + self.conv_bn_relu2 = layer_libs.ConvBnRelu( in_channels=in_channels + inter_channels * len(bin_sizes), out_channels=out_channels, kernel_size=3, @@ -147,24 +159,21 @@ class PPModule(nn.Layer): conv (tensor): a tensor after Pyramid Pooling Module """ - # this paddle version does not support AdaptiveAvgPool2d, so skip it here. - # prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) - conv = layer_utils.ConvBnRelu( + prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) + conv = layer_libs.ConvBnRelu( in_channels=in_channels, out_channels=out_channels, kernel_size=1) - return conv + return nn.Sequential(prior, conv) def forward(self, input): cat_layers = [] for i, stage in enumerate(self.stages): size = self.bin_sizes[i] - x = F.adaptive_pool2d( - input, pool_size=(size, size), pool_type="max") - x = stage(x) + x = stage(input) x = F.resize_bilinear(x, out_shape=input.shape[2:]) cat_layers.append(x) cat_layers = [input] + cat_layers[::-1] cat = paddle.concat(cat_layers, axis=1) out = self.conv_bn_relu2(cat) - return out + return out \ No newline at end of file diff --git a/dygraph/paddleseg/models/deeplab.py b/dygraph/paddleseg/models/deeplab.py index 7c7e0cb187808baa8c7543d8eda7773a53c5b0fc..ff530b2fa5ad2f08700a1d9ba7f2b7c81a09015a 100644 --- a/dygraph/paddleseg/models/deeplab.py +++ b/dygraph/paddleseg/models/deeplab.py @@ -18,7 +18,7 @@ import paddle import paddle.nn.functional as F from paddle import nn from paddleseg.cvlibs import manager -from paddleseg.models.common import layer_utils +from paddleseg.models.common import pyramid_pool, layer_libs from paddleseg.utils import utils __all__ = ['DeepLabV3P', 'DeepLabV3'] @@ -43,8 +43,9 @@ class DeepLabV3P(nn.Layer): model_pretrained (str): the path of pretrained model. - output_stride (int): the ratio of input size and final feature size. - Support 16 or 8. Default to 16. + aspp_ratios (tuple): the dilation rate using in ASSP module. + if output_stride=16, aspp_ratios should be set as (1, 6, 12, 18). + if output_stride=8, aspp_ratios is (1, 12, 24, 36). backbone_indices (tuple): two values in the tuple indicte the indices of output of backbone. the first index will be taken as a low-level feature in Deconder component; @@ -61,18 +62,24 @@ class DeepLabV3P(nn.Layer): def __init__(self, num_classes, backbone, + backbone_pretrained=None, model_pretrained=None, backbone_indices=(0, 3), backbone_channels=(256, 2048), - output_stride=16): + aspp_ratios=(1, 6, 12, 18), + aspp_out_channels=256): super(DeepLabV3P, self).__init__() self.backbone = backbone - self.aspp = ASPP(output_stride, backbone_channels[1]) + self.backbone_pretrained = backbone_pretrained + self.model_pretrained = model_pretrained + + self.aspp = pyramid_pool.ASPPModule( + aspp_ratios, backbone_channels[1], aspp_out_channels, sep_conv=True, image_pooling=True) self.decoder = Decoder(num_classes, backbone_channels[0]) self.backbone_indices = backbone_indices - self.init_weight(model_pretrained) + self.init_weight() def forward(self, input, label=None): @@ -87,19 +94,17 @@ class DeepLabV3P(nn.Layer): return logit_list - def init_weight(self, pretrained_model=None): + def init_weight(self): """ Initialize the parameters of model parts. Args: pretrained_model ([str], optional): the path of pretrained model. Defaults to None. """ - if pretrained_model is not None: - if os.path.exists(pretrained_model): - utils.load_pretrained_model(self, pretrained_model) - else: - raise Exception('Pretrained model is not found: {}'.format( - pretrained_model)) - + if self.model_pretrained is not None: + utils.load_pretrained_model(self, self.model_pretrained) + elif self.backbone_pretrained is not None: + utils.load_pretrained_model(self.backbone, self.backbone_pretrained) + @manager.MODELS.add_component class DeepLabV3(nn.Layer): @@ -119,15 +124,21 @@ class DeepLabV3(nn.Layer): def __init__(self, num_classes, backbone, + backbone_pretrained=None, model_pretrained=None, backbone_indices=(3,), backbone_channels=(2048,), - output_stride=16): + aspp_ratios=(1, 6, 12, 18), + aspp_out_channels=256): super(DeepLabV3, self).__init__() self.backbone = backbone - self.aspp = ASPP(output_stride, backbone_channels[0]) + + self.aspp = pyramid_pool.ASPPModule( + aspp_ratios, backbone_channels[0], aspp_out_channels, + sep_conv=False, image_pooling=True) + self.cls = nn.Conv2d( in_channels=backbone_channels[0], out_channels=num_classes, @@ -161,98 +172,6 @@ class DeepLabV3(nn.Layer): pretrained_model)) -class ImageAverage(nn.Layer): - """ - Global average pooling - - Args: - in_channels (int): the number of input channels. - - """ - - def __init__(self, in_channels): - super(ImageAverage, self).__init__() - self.conv_bn_relu = layer_utils.ConvBnRelu( - in_channels, out_channels=256, kernel_size=1) - - def forward(self, input): - x = paddle.reduce_mean(input, dim=[2, 3], keep_dim=True) - x = self.conv_bn_relu(x) - x = F.resize_bilinear(x, out_shape=input.shape[2:]) - return x - - -class ASPP(nn.Layer): - """ - Decoder module of DeepLabV3P model - - Args: - output_stride (int): the ratio of input size and final feature size. Support 16 or 8. - - in_channels (int): the number of input channels in decoder module. - - """ - - def __init__(self, output_stride, in_channels): - super(ASPP, self).__init__() - - if output_stride == 16: - aspp_ratios = (6, 12, 18) - elif output_stride == 8: - aspp_ratios = (12, 24, 36) - else: - raise NotImplementedError( - "Only support output_stride is 8 or 16, but received{}".format( - output_stride)) - - self.image_average = ImageAverage(in_channels=in_channels) - - # The first aspp using 1*1 conv - self.aspp1 = layer_utils.DepthwiseConvBnRelu( - in_channels=in_channels, out_channels=256, kernel_size=1) - - # The second aspp using 3*3 (separable) conv at dilated rate aspp_ratios[0] - self.aspp2 = layer_utils.DepthwiseConvBnRelu( - in_channels=in_channels, - out_channels=256, - kernel_size=3, - dilation=aspp_ratios[0], - padding=aspp_ratios[0]) - - # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[1] - self.aspp3 = layer_utils.DepthwiseConvBnRelu( - in_channels=in_channels, - out_channels=256, - kernel_size=3, - dilation=aspp_ratios[1], - padding=aspp_ratios[1]) - - # The Third aspp using 3*3 (separable) conv at dilated rate aspp_ratios[2] - self.aspp4 = layer_utils.DepthwiseConvBnRelu( - in_channels=in_channels, - out_channels=256, - kernel_size=3, - dilation=aspp_ratios[2], - padding=aspp_ratios[2]) - - # After concat op, using 1*1 conv - self.conv_bn_relu = layer_utils.ConvBnRelu( - in_channels=1280, out_channels=256, kernel_size=1) - - def forward(self, x): - - x1 = self.image_average(x) - x2 = self.aspp1(x) - x3 = self.aspp2(x) - x4 = self.aspp3(x) - x5 = self.aspp4(x) - x = paddle.concat([x1, x2, x3, x4, x5], axis=1) - - x = self.conv_bn_relu(x) - x = F.dropout(x, p=0.1) # dropout_prob - return x - - class Decoder(nn.Layer): """ Decoder module of DeepLabV3P model @@ -267,12 +186,12 @@ class Decoder(nn.Layer): def __init__(self, num_classes, in_channels): super(Decoder, self).__init__() - self.conv_bn_relu1 = layer_utils.ConvBnRelu( + self.conv_bn_relu1 = layer_libs.ConvBnRelu( in_channels=in_channels, out_channels=48, kernel_size=1) - self.conv_bn_relu2 = layer_utils.DepthwiseConvBnRelu( + self.conv_bn_relu2 = layer_libs.DepthwiseConvBnRelu( in_channels=304, out_channels=256, kernel_size=3, padding=1) - self.conv_bn_relu3 = layer_utils.DepthwiseConvBnRelu( + self.conv_bn_relu3 = layer_libs.DepthwiseConvBnRelu( in_channels=256, out_channels=256, kernel_size=3, padding=1) self.conv = nn.Conv2d( in_channels=256, out_channels=num_classes, kernel_size=1) diff --git a/dygraph/paddleseg/models/fast_scnn.py b/dygraph/paddleseg/models/fast_scnn.py index 434f083e99d5337a51b3581f906b0a1fc518676e..3abbcffc85c52563f32406e27a645a29860a2ac3 100644 --- a/dygraph/paddleseg/models/fast_scnn.py +++ b/dygraph/paddleseg/models/fast_scnn.py @@ -15,7 +15,7 @@ import paddle.nn.functional as F from paddle import nn from paddleseg.cvlibs import manager -from paddleseg.models.common import layer_utils, model_utils +from paddleseg.models.common import layer_libs @manager.MODELS.add_component @@ -110,15 +110,15 @@ class LearningToDownsample(nn.Layer): def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64): super(LearningToDownsample, self).__init__() - self.conv_bn_relu = layer_utils.ConvBnRelu( + self.conv_bn_relu = layer_libs.ConvBnRelu( in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2) - self.dsconv_bn_relu1 = layer_utils.DepthwiseConvBnRelu( + self.dsconv_bn_relu1 = layer_libs.DepthwiseConvBnRelu( in_channels=dw_channels1, out_channels=dw_channels2, kernel_size=3, stride=2, padding=1) - self.dsconv_bn_relu2 = layer_utils.DepthwiseConvBnRelu( + self.dsconv_bn_relu2 = layer_libs.DepthwiseConvBnRelu( in_channels=dw_channels2, out_channels=out_channels, kernel_size=3, @@ -220,13 +220,13 @@ class LinearBottleneck(nn.Layer): expand_channels = in_channels * expansion self.block = nn.Sequential( # pw - layer_utils.ConvBnRelu( + layer_libs.ConvBnRelu( in_channels=in_channels, out_channels=expand_channels, kernel_size=1, bias_attr=False), # dw - layer_utils.ConvBnRelu( + layer_libs.ConvBnRelu( in_channels=expand_channels, out_channels=expand_channels, kernel_size=3, @@ -267,7 +267,7 @@ class FeatureFusionModule(nn.Layer): super(FeatureFusionModule, self).__init__() # There only depth-wise conv is used WITHOUT point-wise conv - self.dwconv = layer_utils.ConvBnRelu( + self.dwconv = layer_libs.ConvBnRelu( in_channels=low_in_channels, out_channels=out_channels, kernel_size=3, @@ -317,13 +317,13 @@ class Classifier(nn.Layer): def __init__(self, input_channels, num_classes): super(Classifier, self).__init__() - self.dsconv1 = layer_utils.DepthwiseConvBnRelu( + self.dsconv1 = layer_libs.DepthwiseConvBnRelu( in_channels=input_channels, out_channels=input_channels, kernel_size=3, padding=1) - self.dsconv2 = layer_utils.DepthwiseConvBnRelu( + self.dsconv2 = layer_libs.DepthwiseConvBnRelu( in_channels=input_channels, out_channels=input_channels, kernel_size=3, diff --git a/dygraph/paddleseg/models/gcnet.py b/dygraph/paddleseg/models/gcnet.py index 97a70d13f6c1f53a6123425f42db1315385d61d1..09a900655b1808b19d0e2dcd751d4ee22769d220 100644 --- a/dygraph/paddleseg/models/gcnet.py +++ b/dygraph/paddleseg/models/gcnet.py @@ -18,7 +18,7 @@ import paddle import paddle.nn.functional as F from paddle import nn from paddleseg.cvlibs import manager -from paddleseg.models.common import layer_utils, model_utils +from paddleseg.models.common import layer_libs from paddleseg.utils import utils @@ -72,7 +72,7 @@ class GCNet(nn.Layer): self.backbone = backbone in_channels = backbone_channels[1] - self.conv_bn_relu1 = layer_utils.ConvBnRelu( + self.conv_bn_relu1 = layer_libs.ConvBnRelu( in_channels=in_channels, out_channels=gc_channels, kernel_size=3, @@ -80,13 +80,13 @@ class GCNet(nn.Layer): self.gc_block = GlobalContextBlock(in_channels=gc_channels, ratio=ratio) - self.conv_bn_relu2 = layer_utils.ConvBnRelu( + self.conv_bn_relu2 = layer_libs.ConvBnRelu( in_channels=gc_channels, out_channels=gc_channels, kernel_size=3, padding=1) - self.conv_bn_relu3 = layer_utils.ConvBnRelu( + self.conv_bn_relu3 = layer_libs.ConvBnRelu( in_channels=in_channels + gc_channels, out_channels=gc_channels, kernel_size=3, @@ -96,7 +96,7 @@ class GCNet(nn.Layer): in_channels=gc_channels, out_channels=num_classes, kernel_size=1) if enable_auxiliary_loss: - self.auxlayer = model_utils.AuxLayer( + self.auxlayer = layer_libs.AuxLayer( in_channels=backbone_channels[0], inter_channels=backbone_channels[0] // 4, out_channels=num_classes) @@ -161,9 +161,9 @@ class GlobalContextBlock(nn.Layer): self.conv_mask = nn.Conv2d( in_channels=in_channels, out_channels=1, kernel_size=1) - # current paddle version does not support Softmax class - # self.softmax = layer_utils.Activation("softmax", dim=2) + self.softmax = nn.Softmax(axis=2) + inter_channels = int(in_channels * ratio) self.channel_add_conv = nn.Sequential( nn.Conv2d( @@ -188,7 +188,7 @@ class GlobalContextBlock(nn.Layer): # [N, 1, H * W] context_mask = paddle.reshape( context_mask, shape=[batch, 1, height * width]) - context_mask = F.softmax(context_mask) + context_mask = self.softmax(context_mask) # [N, 1, H * W, 1] context_mask = paddle.unsqueeze(context_mask, axis=-1) # [N, 1, C, 1] diff --git a/dygraph/paddleseg/models/ocrnet.py b/dygraph/paddleseg/models/ocrnet.py index 78dfd136d7aaf15aed50f598c66ddbf72ac1e242..00cf079c0c185b5d5904610ef0ca2d3929836e25 100644 --- a/dygraph/paddleseg/models/ocrnet.py +++ b/dygraph/paddleseg/models/ocrnet.py @@ -18,7 +18,7 @@ import paddle.fluid as fluid from paddle.fluid.dygraph import Sequential, Conv2D from paddleseg.cvlibs import manager -from paddleseg.models.common.layer_utils import ConvBnRelu +from paddleseg.models.common.layer_libs import ConvBnRelu from paddleseg import utils diff --git a/dygraph/paddleseg/models/pspnet.py b/dygraph/paddleseg/models/pspnet.py index 764749ce09f4618420d142d1955cf52d9aa5c258..69b831ebb3e29f979128e96a0bb1c7b5a45a37a3 100644 --- a/dygraph/paddleseg/models/pspnet.py +++ b/dygraph/paddleseg/models/pspnet.py @@ -17,7 +17,7 @@ import os import paddle.nn.functional as F from paddle import nn from paddleseg.cvlibs import manager -from paddleseg.models.common import model_utils +from paddleseg.models.common import layer_libs, pyramid_pool from paddleseg.utils import utils @@ -70,7 +70,7 @@ class PSPNet(nn.Layer): self.backbone = backbone self.backbone_indices = backbone_indices - self.psp_module = model_utils.PPModule( + self.psp_module = pyramid_pool.PPModule( in_channels=backbone_channels[1], out_channels=pp_out_channels, bin_sizes=bin_sizes) @@ -81,8 +81,11 @@ class PSPNet(nn.Layer): kernel_size=1) if enable_auxiliary_loss: - self.fcn_head = model_utils.FCNHead( - in_channels=backbone_channels[0], out_channels=num_classes) + + self.auxlayer = layer_libs.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) self.enable_auxiliary_loss = enable_auxiliary_loss @@ -102,7 +105,7 @@ class PSPNet(nn.Layer): if self.enable_auxiliary_loss: auxiliary_feat = feat_list[self.backbone_indices[0]] - auxiliary_logit = self.fcn_head(auxiliary_feat) + auxiliary_logit = self.auxlayer(auxiliary_feat) auxiliary_logit = F.resize_bilinear(auxiliary_logit, input.shape[2:]) logit_list.append(auxiliary_logit) diff --git a/dygraph/transforms/__init__.py b/dygraph/transforms/__init__.py deleted file mode 100644 index 8f1d5ae80aeb1eb77ac672b1cbcfedcbfbd643c4..0000000000000000000000000000000000000000 --- a/dygraph/transforms/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .transforms import * -from . import functional diff --git a/dygraph/transforms/functional.py b/dygraph/transforms/functional.py deleted file mode 100644 index 6d5a9b10db15edb05692c8aa4249912652e0a745..0000000000000000000000000000000000000000 --- a/dygraph/transforms/functional.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance - - -def normalize(im, mean, std): - im = im.astype(np.float32, copy=False) / 255.0 - im -= mean - im /= std - return im - - -def permute(im): - im = np.transpose(im, (2, 0, 1)) - return im - - -def resize(im, target_size=608, interp=cv2.INTER_LINEAR): - if isinstance(target_size, list) or isinstance(target_size, tuple): - w = target_size[0] - h = target_size[1] - else: - w = target_size - h = target_size - im = cv2.resize(im, (w, h), interpolation=interp) - return im - - -def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): - value = max(im.shape[0], im.shape[1]) - scale = float(long_size) / float(value) - resized_width = int(round(im.shape[1] * scale)) - resized_height = int(round(im.shape[0] * scale)) - - im = cv2.resize( - im, (resized_width, resized_height), interpolation=interpolation) - return im - - -def horizontal_flip(im): - if len(im.shape) == 3: - im = im[:, ::-1, :] - elif len(im.shape) == 2: - im = im[:, ::-1] - return im - - -def vertical_flip(im): - if len(im.shape) == 3: - im = im[::-1, :, :] - elif len(im.shape) == 2: - im = im[::-1, :] - return im - - -def brightness(im, brightness_lower, brightness_upper): - brightness_delta = np.random.uniform(brightness_lower, brightness_upper) - im = ImageEnhance.Brightness(im).enhance(brightness_delta) - return im - - -def contrast(im, contrast_lower, contrast_upper): - contrast_delta = np.random.uniform(contrast_lower, contrast_upper) - im = ImageEnhance.Contrast(im).enhance(contrast_delta) - return im - - -def saturation(im, saturation_lower, saturation_upper): - saturation_delta = np.random.uniform(saturation_lower, saturation_upper) - im = ImageEnhance.Color(im).enhance(saturation_delta) - return im - - -def hue(im, hue_lower, hue_upper): - hue_delta = np.random.uniform(hue_lower, hue_upper) - im = np.array(im.convert('HSV')) - im[:, :, 0] = im[:, :, 0] + hue_delta - im = Image.fromarray(im, mode='HSV').convert('RGB') - return im - - -def rotate(im, rotate_lower, rotate_upper): - rotate_delta = np.random.uniform(rotate_lower, rotate_upper) - im = im.rotate(int(rotate_delta)) - return im diff --git a/dygraph/transforms/transforms.py b/dygraph/transforms/transforms.py deleted file mode 100644 index 91404ade7d263c6df551ee8b15f74f9d1df96ae0..0000000000000000000000000000000000000000 --- a/dygraph/transforms/transforms.py +++ /dev/null @@ -1,576 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -from collections import OrderedDict - -import numpy as np -from PIL import Image -import cv2 - -from .functional import * -from dygraph.cvlibs import manager - - -@manager.TRANSFORMS.add_component -class Compose: - def __init__(self, transforms, to_rgb=True): - if not isinstance(transforms, list): - raise TypeError('The transforms must be a list!') - if len(transforms) < 1: - raise ValueError('The length of transforms ' + \ - 'must be equal or larger than 1!') - self.transforms = transforms - self.to_rgb = to_rgb - - def __call__(self, im, im_info=None, label=None): - if im_info is None: - im_info = list() - if isinstance(im, str): - im = cv2.imread(im).astype('float32') - if isinstance(label, str): - label = np.asarray(Image.open(label)) - if im is None: - raise ValueError('Can\'t read The image file {}!'.format(im)) - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - for op in self.transforms: - outputs = op(im, im_info, label) - im = outputs[0] - if len(outputs) >= 2: - im_info = outputs[1] - if len(outputs) == 3: - label = outputs[2] - im = permute(im) - # if len(outputs) == 3: - # label = label[np.newaxis, :, :] - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomHorizontalFlip: - def __init__(self, prob=0.5): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - if random.random() < self.prob: - im = horizontal_flip(im) - if label is not None: - label = horizontal_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomVerticalFlip: - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - if random.random() < self.prob: - im = vertical_flip(im) - if label is not None: - label = vertical_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class Resize: - # The interpolation mode - interp_dict = { - 'NEAREST': cv2.INTER_NEAREST, - 'LINEAR': cv2.INTER_LINEAR, - 'CUBIC': cv2.INTER_CUBIC, - 'AREA': cv2.INTER_AREA, - 'LANCZOS4': cv2.INTER_LANCZOS4 - } - - def __init__(self, target_size=512, interp='LINEAR'): - self.interp = interp - if not (interp == "RANDOM" or interp in self.interp_dict): - raise ValueError("interp should be one of {}".format( - self.interp_dict.keys())) - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise TypeError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - - self.target_size = target_size - - def __call__(self, im, im_info=None, label=None): - if im_info is None: - im_info = list() - im_info.append(('resize', im.shape[:2])) - if not isinstance(im, np.ndarray): - raise TypeError("Resize: image type is not numpy.") - if len(im.shape) != 3: - raise ValueError('Resize: image is not 3-dimensional.') - if self.interp == "RANDOM": - interp = random.choice(list(self.interp_dict.keys())) - else: - interp = self.interp - im = resize(im, self.target_size, self.interp_dict[interp]) - if label is not None: - label = resize(label, self.target_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class ResizeByLong: - def __init__(self, long_size): - self.long_size = long_size - - def __call__(self, im, im_info=None, label=None): - if im_info is None: - im_info = list() - - im_info.append(('resize', im.shape[:2])) - im = resize_long(im, self.long_size) - if label is not None: - label = resize_long(label, self.long_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class ResizeRangeScaling: - def __init__(self, min_value=400, max_value=600): - if min_value > max_value: - raise ValueError('min_value must be less than max_value, ' - 'but they are {} and {}.'.format( - min_value, max_value)) - self.min_value = min_value - self.max_value = max_value - - def __call__(self, im, im_info=None, label=None): - if self.min_value == self.max_value: - random_size = self.max_value - else: - random_size = int( - np.random.uniform(self.min_value, self.max_value) + 0.5) - im = resize_long(im, random_size, cv2.INTER_LINEAR) - if label is not None: - label = resize_long(label, random_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class ResizeStepScaling: - def __init__(self, - min_scale_factor=0.75, - max_scale_factor=1.25, - scale_step_size=0.25): - if min_scale_factor > max_scale_factor: - raise ValueError( - 'min_scale_factor must be less than max_scale_factor, ' - 'but they are {} and {}.'.format(min_scale_factor, - max_scale_factor)) - self.min_scale_factor = min_scale_factor - self.max_scale_factor = max_scale_factor - self.scale_step_size = scale_step_size - - def __call__(self, im, im_info=None, label=None): - if self.min_scale_factor == self.max_scale_factor: - scale_factor = self.min_scale_factor - - elif self.scale_step_size == 0: - scale_factor = np.random.uniform(self.min_scale_factor, - self.max_scale_factor) - - else: - num_steps = int((self.max_scale_factor - self.min_scale_factor) / - self.scale_step_size + 1) - scale_factors = np.linspace(self.min_scale_factor, - self.max_scale_factor, - num_steps).tolist() - np.random.shuffle(scale_factors) - scale_factor = scale_factors[0] - w = int(round(scale_factor * im.shape[1])) - h = int(round(scale_factor * im.shape[0])) - - im = resize(im, (w, h), cv2.INTER_LINEAR) - if label is not None: - label = resize(label, (w, h), cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class Normalize: - def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): - self.mean = mean - self.std = std - if not (isinstance(self.mean, list) and isinstance(self.std, list)): - raise ValueError("{}: input type is invalid.".format(self)) - from functools import reduce - if reduce(lambda x, y: x * y, self.std) == 0: - raise ValueError('{}: std is invalid!'.format(self)) - - def __call__(self, im, im_info=None, label=None): - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - im = normalize(im, mean, std) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class Padding: - def __init__(self, - target_size, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise ValueError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - self.target_size = target_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - if im_info is None: - im_info = list() - im_info.append(('padding', im.shape[:2])) - - im_height, im_width = im.shape[0], im.shape[1] - if isinstance(self.target_size, int): - target_height = self.target_size - target_width = self.target_size - else: - target_height = self.target_size[1] - target_width = self.target_size[0] - pad_height = target_height - im_height - pad_width = target_width - im_width - if pad_height < 0 or pad_width < 0: - raise ValueError( - 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' - .format(im_width, im_height, target_width, target_height)) - else: - im = cv2.copyMakeBorder( - im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomPaddingCrop: - def __init__(self, - crop_size=512, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - if isinstance(crop_size, list) or isinstance(crop_size, tuple): - if len(crop_size) != 2: - raise ValueError( - 'when crop_size is list or tuple, it should include 2 elements, but it is {}' - .format(crop_size)) - elif not isinstance(crop_size, int): - raise TypeError( - "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(crop_size))) - self.crop_size = crop_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - if isinstance(self.crop_size, int): - crop_width = self.crop_size - crop_height = self.crop_size - else: - crop_width = self.crop_size[0] - crop_height = self.crop_size[1] - - img_height = im.shape[0] - img_width = im.shape[1] - - if img_height == crop_height and img_width == crop_width: - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - else: - pad_height = max(crop_height - img_height, 0) - pad_width = max(crop_width - img_width, 0) - if (pad_height > 0 or pad_width > 0): - im = cv2.copyMakeBorder( - im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) - img_height = im.shape[0] - img_width = im.shape[1] - - if crop_height > 0 and crop_width > 0: - h_off = np.random.randint(img_height - crop_height + 1) - w_off = np.random.randint(img_width - crop_width + 1) - - im = im[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] - if label is not None: - label = label[h_off:(crop_height + h_off), w_off:( - w_off + crop_width)] - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomBlur: - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - if self.prob <= 0: - n = 0 - elif self.prob >= 1: - n = 1 - else: - n = int(1.0 / self.prob) - if n > 0: - if np.random.randint(0, n) == 0: - radius = np.random.randint(3, 10) - if radius % 2 != 1: - radius = radius + 1 - if radius > 9: - radius = 9 - im = cv2.GaussianBlur(im, (radius, radius), 0, 0) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomRotation: - def __init__(self, - max_rotation=15, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - self.max_rotation = max_rotation - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - if self.max_rotation > 0: - (h, w) = im.shape[:2] - do_rotation = np.random.uniform(-self.max_rotation, - self.max_rotation) - pc = (w // 2, h // 2) - r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) - cos = np.abs(r[0, 0]) - sin = np.abs(r[0, 1]) - - nw = int((h * sin) + (w * cos)) - nh = int((h * cos) + (w * sin)) - - (cx, cy) = pc - r[0, 2] += (nw / 2) - cx - r[1, 2] += (nh / 2) - cy - dsize = (nw, nh) - im = cv2.warpAffine( - im, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - label = cv2.warpAffine( - label, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.label_padding_value) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomScaleAspect: - def __init__(self, min_scale=0.5, aspect_ratio=0.33): - self.min_scale = min_scale - self.aspect_ratio = aspect_ratio - - def __call__(self, im, im_info=None, label=None): - if self.min_scale != 0 and self.aspect_ratio != 0: - img_height = im.shape[0] - img_width = im.shape[1] - for i in range(0, 10): - area = img_height * img_width - target_area = area * np.random.uniform(self.min_scale, 1.0) - aspectRatio = np.random.uniform(self.aspect_ratio, - 1.0 / self.aspect_ratio) - - dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) - dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) - if (np.random.randint(10) < 5): - tmp = dw - dw = dh - dh = tmp - - if (dh < img_height and dw < img_width): - h1 = np.random.randint(0, img_height - dh) - w1 = np.random.randint(0, img_width - dw) - - im = im[h1:(h1 + dh), w1:(w1 + dw), :] - label = label[h1:(h1 + dh), w1:(w1 + dw)] - im = cv2.resize( - im, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - label = cv2.resize( - label, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) - break - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -@manager.TRANSFORMS.add_component -class RandomDistort: - def __init__(self, - brightness_range=0.5, - brightness_prob=0.5, - contrast_range=0.5, - contrast_prob=0.5, - saturation_range=0.5, - saturation_prob=0.5, - hue_range=18, - hue_prob=0.5): - self.brightness_range = brightness_range - self.brightness_prob = brightness_prob - self.contrast_range = contrast_range - self.contrast_prob = contrast_prob - self.saturation_range = saturation_range - self.saturation_prob = saturation_prob - self.hue_range = hue_range - self.hue_prob = hue_prob - - def __call__(self, im, im_info=None, label=None): - brightness_lower = 1 - self.brightness_range - brightness_upper = 1 + self.brightness_range - contrast_lower = 1 - self.contrast_range - contrast_upper = 1 + self.contrast_range - saturation_lower = 1 - self.saturation_range - saturation_upper = 1 + self.saturation_range - hue_lower = -self.hue_range - hue_upper = self.hue_range - ops = [brightness, contrast, saturation, hue] - random.shuffle(ops) - params_dict = { - 'brightness': { - 'brightness_lower': brightness_lower, - 'brightness_upper': brightness_upper - }, - 'contrast': { - 'contrast_lower': contrast_lower, - 'contrast_upper': contrast_upper - }, - 'saturation': { - 'saturation_lower': saturation_lower, - 'saturation_upper': saturation_upper - }, - 'hue': { - 'hue_lower': hue_lower, - 'hue_upper': hue_upper - } - } - prob_dict = { - 'brightness': self.brightness_prob, - 'contrast': self.contrast_prob, - 'saturation': self.saturation_prob, - 'hue': self.hue_prob - } - im = im.astype('uint8') - im = Image.fromarray(im) - for id in range(4): - params = params_dict[ops[id].__name__] - prob = prob_dict[ops[id].__name__] - params['im'] = im - if np.random.uniform(0, 1) < prob: - im = ops[id](**params) - im = np.asarray(im).astype('float32') - if label is None: - return (im, im_info) - else: - return (im, im_info, label) diff --git a/dygraph/utils/__init__.py b/dygraph/utils/__init__.py deleted file mode 100644 index a22f9e5ec0ff32a5e42b6c2d7d6bed14a56994a1..0000000000000000000000000000000000000000 --- a/dygraph/utils/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import logger -from . import download -from .metrics import ConfusionMatrix -from .utils import * -from .timer import Timer, calculate_eta -from .get_environ_info import get_environ_info -from .config import Config diff --git a/dygraph/utils/config.py b/dygraph/utils/config.py deleted file mode 100644 index e0577a6e0eaaa5353c680bfded30094cacd969ab..0000000000000000000000000000000000000000 --- a/dygraph/utils/config.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import codecs -import os -from typing import Any, Callable - -import yaml -import paddle.fluid as fluid - -import dygraph.cvlibs.manager as manager - - -class Config(object): - ''' - Training config. - - Args: - path(str) : the path of config file, supports yaml format only - ''' - - def __init__(self, path: str): - if not os.path.exists(path): - raise FileNotFoundError('File {} does not exist'.format(path)) - - if path.endswith('yml') or path.endswith('yaml'): - dic = self._parse_from_yaml(path) - print(dic) - self._build(dic) - else: - raise RuntimeError('Config file should in yaml format!') - - def _update_dic(self, dic, base_dic): - """ - update config from dic based base_dic - """ - base_dic = base_dic.copy() - for key, val in dic.items(): - if isinstance(val, dict) and key in base_dic: - base_dic[key] = self._update_dic(val, base_dic[key]) - else: - base_dic[key] = val - dic = base_dic - return dic - - def _parse_from_yaml(self, path: str): - '''Parse a yaml file and build config''' - with codecs.open(path, 'r', 'utf-8') as file: - dic = yaml.load(file, Loader=yaml.FullLoader) - if '_base_' in dic: - cfg_dir = os.path.dirname(path) - base_path = dic.pop('_base_') - base_path = os.path.join(cfg_dir, base_path) - base_dic = self._parse_from_yaml(base_path) - dic = self._update_dic(dic, base_dic) - return dic - - def _build(self, dic: dict): - '''Build config from dictionary''' - dic = dic.copy() - - self._batch_size = dic.get('batch_size', 1) - self._iters = dic.get('iters') - - if 'model' not in dic: - raise RuntimeError() - self._model_cfg = dic['model'] - self._model = None - - self._train_dataset = dic.get('train_dataset') - self._val_dataset = dic.get('val_dataset') - - self._learning_rate_cfg = dic.get('learning_rate', {}) - self._learning_rate = self._learning_rate_cfg.get('value') - self._decay = self._learning_rate_cfg.get('decay', { - 'type': 'poly', - 'power': 0.9 - }) - - self._loss_cfg = dic.get('loss', {}) - self._losses = None - - self._optimizer_cfg = dic.get('optimizer', {}) - - def update(self, - learning_rate: float = None, - batch_size: int = None, - iters: int = None): - '''Update config''' - if learning_rate: - self._learning_rate = learning_rate - - if batch_size: - self._batch_size = batch_size - - if iters: - self._iters = iters - - @property - def batch_size(self) -> int: - return self._batch_size - - @property - def iters(self) -> int: - if not self._iters: - raise RuntimeError('No iters specified in the configuration file.') - return self._iters - - @property - def learning_rate(self) -> float: - if not self._learning_rate: - raise RuntimeError( - 'No learning rate specified in the configuration file.') - - if self.decay_type == 'poly': - lr = self._learning_rate - args = self.decay_args - args.setdefault('decay_steps', self.iters) - return fluid.layers.polynomial_decay(lr, **args) - else: - raise RuntimeError('Only poly decay support.') - - @property - def optimizer(self) -> fluid.optimizer.Optimizer: - if self.optimizer_type == 'sgd': - lr = self.learning_rate - args = self.optimizer_args - args.setdefault('momentum', 0.9) - return fluid.optimizer.Momentum( - lr, parameter_list=self.model.parameters(), **args) - else: - raise RuntimeError('Only sgd optimizer support.') - - @property - def optimizer_type(self) -> str: - otype = self._optimizer_cfg.get('type') - if not otype: - raise RuntimeError( - 'No optimizer type specified in the configuration file.') - return otype - - @property - def optimizer_args(self) -> dict: - args = self._optimizer_cfg.copy() - args.pop('type') - return args - - @property - def decay_type(self) -> str: - return self._decay['type'] - - @property - def decay_args(self) -> dict: - args = self._decay.copy() - args.pop('type') - return args - - @property - def loss(self) -> list: - if not self._losses: - args = self._loss_cfg.copy() - self._losses = dict() - for key, val in args.items(): - if key == 'types': - self._losses['types'] = [] - for item in args['types']: - self._losses['types'].append(self._load_object(item)) - else: - self._losses[key] = val - if len(self._losses['coef']) != len(self._losses['types']): - raise RuntimeError( - 'The length of coef should equal to types in loss config: {} != {}.' - .format( - len(self._losses['coef']), len(self._losses['types']))) - return self._losses - - @property - def model(self) -> Callable: - if not self._model: - self._model = self._load_object(self._model_cfg) - return self._model - - @property - def train_dataset(self) -> Any: - if not self._train_dataset: - return None - return self._load_object(self._train_dataset) - - @property - def val_dataset(self) -> Any: - if not self._val_dataset: - return None - return self._load_object(self._val_dataset) - - def _load_component(self, com_name: str) -> Any: - com_list = [ - manager.MODELS, manager.BACKBONES, manager.DATASETS, - manager.TRANSFORMS, manager.LOSSES - ] - - for com in com_list: - if com_name in com.components_dict: - return com[com_name] - else: - raise RuntimeError( - 'The specified component was not found {}.'.format(com_name)) - - def _load_object(self, cfg: dict) -> Any: - cfg = cfg.copy() - if 'type' not in cfg: - raise RuntimeError('No object information in {}.'.format(cfg)) - - component = self._load_component(cfg.pop('type')) - - params = {} - for key, val in cfg.items(): - if self._is_meta_type(val): - params[key] = self._load_object(val) - elif isinstance(val, list): - params[key] = [ - self._load_object(item) - if self._is_meta_type(item) else item for item in val - ] - else: - params[key] = val - - return component(**params) - - def _is_meta_type(self, item: Any) -> bool: - return isinstance(item, dict) and 'type' in item diff --git a/dygraph/utils/download.py b/dygraph/utils/download.py deleted file mode 100644 index 7bf6dd096a4b33587b47bed127673d8fe09aefbb..0000000000000000000000000000000000000000 --- a/dygraph/utils/download.py +++ /dev/null @@ -1,135 +0,0 @@ -import os -import sys -import time -import requests -import tarfile -import zipfile -import shutil -import functools - -lasttime = time.time() -FLUSH_INTERVAL = 0.1 - - -def progress(str, end=False): - global lasttime - if end: - str += "\n" - lasttime = 0 - if time.time() - lasttime >= FLUSH_INTERVAL: - sys.stdout.write("\r%s" % str) - lasttime = time.time() - sys.stdout.flush() - - -def _download_file(url, savepath, print_progress): - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(savepath, 'wb') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(savepath, 'wb') as f: - dl = 0 - total_length = int(total_length) - starttime = time.time() - if print_progress: - print("Downloading %s" % os.path.basename(savepath)) - for data in r.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if print_progress: - done = int(50 * dl / total_length) - progress("[%-50s] %.2f%%" % - ('=' * done, float(100 * dl) / total_length)) - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - - -def _uncompress_file_zip(filepath, extrapath): - files = zipfile.ZipFile(filepath, 'r') - filelist = files.namelist() - rootpath = filelist[0] - total_num = len(filelist) - for index, file in enumerate(filelist): - files.extract(file, extrapath) - yield total_num, index, rootpath - files.close() - yield total_num, index, rootpath - - -def _uncompress_file_tar(filepath, extrapath, mode="r:gz"): - files = tarfile.open(filepath, mode) - filelist = files.getnames() - total_num = len(filelist) - rootpath = filelist[0] - for index, file in enumerate(filelist): - files.extract(file, extrapath) - yield total_num, index, rootpath - files.close() - yield total_num, index, rootpath - - -def _uncompress_file(filepath, extrapath, delete_file, print_progress): - if print_progress: - print("Uncompress %s" % os.path.basename(filepath)) - - if filepath.endswith("zip"): - handler = _uncompress_file_zip - elif filepath.endswith("tgz"): - handler = _uncompress_file_tar - else: - handler = functools.partial(_uncompress_file_tar, mode="r") - - for total_num, index, rootpath in handler(filepath, extrapath): - if print_progress: - done = int(50 * float(index) / total_num) - progress( - "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num)) - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - - if delete_file: - os.remove(filepath) - - return rootpath - - -def download_file_and_uncompress(url, - savepath=None, - extrapath=None, - extraname=None, - print_progress=True, - cover=False, - delete_file=True): - if savepath is None: - savepath = "." - - if extrapath is None: - extrapath = "." - - savename = url.split("/")[-1] - savepath = os.path.join(savepath, savename) - savename = ".".join(savename.split(".")[:-1]) - savename = os.path.join(extrapath, savename) - extraname = savename if extraname is None else os.path.join( - extrapath, extraname) - - if cover: - if os.path.exists(savepath): - shutil.rmtree(savepath) - if os.path.exists(savename): - shutil.rmtree(savename) - if os.path.exists(extraname): - shutil.rmtree(extraname) - - if not os.path.exists(extraname): - if not os.path.exists(savename): - if not os.path.exists(savepath): - _download_file(url, savepath, print_progress) - savename = _uncompress_file(savepath, extrapath, delete_file, - print_progress) - savename = os.path.join(extrapath, savename) - shutil.move(savename, extraname) - return extraname diff --git a/dygraph/utils/get_environ_info.py b/dygraph/utils/get_environ_info.py deleted file mode 100644 index 7d789f4d60e875fd11514fa13b901885be7b0024..0000000000000000000000000000000000000000 --- a/dygraph/utils/get_environ_info.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from collections import OrderedDict -import subprocess -import glob - -import paddle -import paddle.fluid as fluid -import cv2 - -IS_WINDOWS = sys.platform == 'win32' - - -def _find_cuda_home(): - '''Finds the CUDA install path. It refers to the implementation of - pytorch . - ''' - # Guess #1 - cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') - if cuda_home is None: - # Guess #2 - try: - which = 'where' if IS_WINDOWS else 'which' - nvcc = subprocess.check_output([which, - 'nvcc']).decode().rstrip('\r\n') - cuda_home = os.path.dirname(os.path.dirname(nvcc)) - except Exception: - # Guess #3 - if IS_WINDOWS: - cuda_homes = glob.glob( - 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') - if len(cuda_homes) == 0: - cuda_home = '' - else: - cuda_home = cuda_homes[0] - else: - cuda_home = '/usr/local/cuda' - if not os.path.exists(cuda_home): - cuda_home = None - return cuda_home - - -def _get_nvcc_info(cuda_home): - if cuda_home is not None and os.path.isdir(cuda_home): - try: - nvcc = os.path.join(cuda_home, 'bin/nvcc') - nvcc = subprocess.check_output( - "{} -V".format(nvcc), shell=True).decode() - nvcc = nvcc.strip().split('\n')[-1] - except subprocess.SubprocessError: - nvcc = "Not Available" - return nvcc - - -def _get_gpu_info(): - try: - gpu_info = subprocess.check_output(['nvidia-smi', - '-L']).decode().strip() - gpu_info = gpu_info.split('\n') - for i in range(len(gpu_info)): - gpu_info[i] = ' '.join(gpu_info[i].split(' ')[:4]) - except: - gpu_info = ' Can not get GPU information. Please make sure CUDA have been installed successfully.' - return gpu_info - - -def get_environ_info(): - """collect environment information""" - env_info = {} - env_info['System Platform'] = sys.platform - if env_info['System Platform'] == 'linux': - try: - lsb_v = subprocess.check_output(['lsb_release', - '-v']).decode().strip() - lsb_v = lsb_v.replace('\t', ' ') - lsb_d = subprocess.check_output(['lsb_release', - '-d']).decode().strip() - lsb_d = lsb_d.replace('\t', ' ') - env_info['LSB'] = [lsb_v, lsb_d] - except: - pass - - env_info['Python'] = sys.version.replace('\n', '') - - compiled_with_cuda = paddle.fluid.is_compiled_with_cuda() - env_info['Paddle compiled with cuda'] = compiled_with_cuda - - if compiled_with_cuda: - cuda_home = _find_cuda_home() - env_info['NVCC'] = _get_nvcc_info(cuda_home) - gpu_nums = fluid.core.get_cuda_device_count() - env_info['GPUs used'] = gpu_nums - env_info['CUDA_VISIBLE_DEVICES'] = os.environ.get( - 'CUDA_VISIBLE_DEVICES') - env_info['GPU'] = _get_gpu_info() - - gcc = subprocess.check_output(['gcc', '--version']).decode() - gcc = gcc.strip().split('\n')[0] - env_info['GCC'] = gcc - - env_info['PaddlePaddle'] = paddle.__version__ - env_info['OpenCV'] = cv2.__version__ - - return env_info diff --git a/dygraph/utils/logger.py b/dygraph/utils/logger.py deleted file mode 100644 index 015948f65090e40895f6d4a72a75a11f2b155447..0000000000000000000000000000000000000000 --- a/dygraph/utils/logger.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import os -import sys - -from paddle.fluid.dygraph.parallel import ParallelEnv - -levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} -log_level = 2 - - -def log(level=2, message=""): - if ParallelEnv().local_rank == 0: - current_time = time.time() - time_array = time.localtime(current_time) - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) - if log_level >= level: - print( - "{} [{}]\t{}".format(current_time, levels[level], - message).encode("utf-8").decode("latin1")) - sys.stdout.flush() - - -def debug(message=""): - log(level=3, message=message) - - -def info(message=""): - log(level=2, message=message) - - -def warning(message=""): - log(level=1, message=message) - - -def error(message=""): - log(level=0, message=message) diff --git a/dygraph/utils/metrics.py b/dygraph/utils/metrics.py deleted file mode 100644 index b107cbd57a936fb909086567fc8b703fb86963b7..0000000000000000000000000000000000000000 --- a/dygraph/utils/metrics.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - - -class ConfusionMatrix(object): - """ - Confusion Matrix for segmentation evaluation - """ - - def __init__(self, num_classes=2, streaming=False): - self.confusion_matrix = np.zeros([num_classes, num_classes], - dtype='int64') - self.num_classes = num_classes - self.streaming = streaming - - def calculate(self, pred, label, ignore=None): - # If not in streaming mode, clear matrix everytime when call `calculate` - if not self.streaming: - self.zero_matrix() - - label = np.transpose(label, (0, 2, 3, 1)) - ignore = np.transpose(ignore, (0, 2, 3, 1)) - mask = np.array(ignore) == 1 - - label = np.asarray(label)[mask] - pred = np.asarray(pred)[mask] - one = np.ones_like(pred) - # Accumuate ([row=label, col=pred], 1) into sparse matrix - spm = csr_matrix((one, (label, pred)), - shape=(self.num_classes, self.num_classes)) - spm = spm.todense() - self.confusion_matrix += spm - - def zero_matrix(self): - """ Clear confusion matrix """ - self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], - dtype='int64') - - def mean_iou(self): - iou_list = [] - avg_iou = 0 - # TODO: use numpy sum axis api to simpliy - vji = np.zeros(self.num_classes, dtype=int) - vij = np.zeros(self.num_classes, dtype=int) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - for c in range(self.num_classes): - total = vji[c] + vij[c] - self.confusion_matrix[c][c] - if total == 0: - iou = 0 - else: - iou = float(self.confusion_matrix[c][c]) / total - avg_iou += iou - iou_list.append(iou) - avg_iou = float(avg_iou) / float(self.num_classes) - return np.array(iou_list), avg_iou - - def accuracy(self): - total = self.confusion_matrix.sum() - total_right = 0 - for c in range(self.num_classes): - total_right += self.confusion_matrix[c][c] - if total == 0: - avg_acc = 0 - else: - avg_acc = float(total_right) / total - - vij = np.zeros(self.num_classes, dtype=int) - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - acc_list = [] - for c in range(self.num_classes): - if vij[c] == 0: - acc = 0 - else: - acc = self.confusion_matrix[c][c] / float(vij[c]) - acc_list.append(acc) - return np.array(acc_list), avg_acc - - def kappa(self): - vji = np.zeros(self.num_classes) - vij = np.zeros(self.num_classes) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - total = self.confusion_matrix.sum() - - # avoid spillovers - # TODO: is it reasonable to hard code 10000.0? - total = float(total) / 10000.0 - vji = vji / 10000.0 - vij = vij / 10000.0 - - tp = 0 - tc = 0 - for c in range(self.num_classes): - tp += vji[c] * vij[c] - tc += self.confusion_matrix[c][c] - - tc = tc / 10000.0 - pe = tp / (total * total) - po = tc / total - - kappa = (po - pe) / (1 - pe) - return kappa diff --git a/dygraph/utils/timer.py b/dygraph/utils/timer.py deleted file mode 100644 index 4ebbddc9a154de4a36d6b6d9b437e14382031c49..0000000000000000000000000000000000000000 --- a/dygraph/utils/timer.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - - -class Timer(object): - """ Simple timer class for measuring time consuming """ - - def __init__(self): - self._start_time = 0.0 - self._end_time = 0.0 - self._elapsed_time = 0.0 - self._is_running = False - - def start(self): - self._is_running = True - self._start_time = time.time() - - def restart(self): - self.start() - - def stop(self): - self._is_running = False - self._end_time = time.time() - - def elapsed_time(self): - self._end_time = time.time() - self._elapsed_time = self._end_time - self._start_time - if not self.is_running: - return 0.0 - - return self._elapsed_time - - @property - def is_running(self): - return self._is_running - - -def calculate_eta(remaining_step, speed): - if remaining_step < 0: - remaining_step = 0 - remaining_time = int(remaining_step * speed) - result = "{:0>2}:{:0>2}:{:0>2}" - arr = [] - for i in range(2, -1, -1): - arr.append(int(remaining_time / 60**i)) - remaining_time %= 60**i - return result.format(*arr) diff --git a/dygraph/utils/utils.py b/dygraph/utils/utils.py deleted file mode 100644 index 0b7d87169a76a196926e7f9e2017ebd42a5605ad..0000000000000000000000000000000000000000 --- a/dygraph/utils/utils.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import math -import cv2 -import paddle.fluid as fluid - -from . import logger - - -def seconds_to_hms(seconds): - h = math.floor(seconds / 3600) - m = math.floor((seconds - h * 3600) / 60) - s = int(seconds - h * 3600 - m * 60) - hms_str = "{}:{}:{}".format(h, m, s) - return hms_str - - -def load_pretrained_model(model, pretrained_model): - if pretrained_model is not None: - logger.info('Load pretrained model from {}'.format(pretrained_model)) - if os.path.exists(pretrained_model): - ckpt_path = os.path.join(pretrained_model, 'model') - try: - para_state_dict, _ = fluid.load_dygraph(ckpt_path) - except: - para_state_dict = fluid.load_program_state(pretrained_model) - - model_state_dict = model.state_dict() - keys = model_state_dict.keys() - num_params_loaded = 0 - for k in keys: - if k not in para_state_dict: - logger.warning("{} is not in pretrained model".format(k)) - elif list(para_state_dict[k].shape) != list( - model_state_dict[k].shape): - logger.warning( - "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})" - .format(k, para_state_dict[k].shape, - model_state_dict[k].shape)) - else: - model_state_dict[k] = para_state_dict[k] - num_params_loaded += 1 - model.set_dict(model_state_dict) - logger.info("There are {}/{} varaibles are loaded.".format( - num_params_loaded, len(model_state_dict))) - - else: - raise ValueError( - 'The pretrained model directory is not Found: {}'.format( - pretrained_model)) - else: - logger.warning('No pretrained model to load, train from scratch') - - -def resume(model, optimizer, resume_model): - if resume_model is not None: - logger.info('Resume model from {}'.format(resume_model)) - if os.path.exists(resume_model): - resume_model = os.path.normpath(resume_model) - ckpt_path = os.path.join(resume_model, 'model') - para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path) - model.set_dict(para_state_dict) - optimizer.set_dict(opti_state_dict) - epoch = resume_model.split('_')[-1] - if epoch.isdigit(): - epoch = int(epoch) - return epoch - else: - raise ValueError( - 'The resume model directory is not Found: {}'.format( - resume_model)) - else: - logger.info('No model need to resume') - - -def visualize(image, result, save_dir=None, weight=0.6): - """ - Convert segment result to color image, and save added image. - Args: - image: the path of origin image - result: the predict result of image - save_dir: the directory for saving visual image - weight: the image weight of visual image, and the result weight is (1 - weight) - """ - color_map = get_color_map_list(256) - color_map = np.array(color_map).astype("uint8") - # Use OpenCV LUT for color mapping - c1 = cv2.LUT(result, color_map[:, 0]) - c2 = cv2.LUT(result, color_map[:, 1]) - c3 = cv2.LUT(result, color_map[:, 2]) - pseudo_img = np.dstack((c1, c2, c3)) - - im = cv2.imread(image) - vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) - - if save_dir is not None: - if not os.path.exists(save_dir): - os.makedirs(save_dir) - image_name = os.path.split(image)[-1] - out_path = os.path.join(save_dir, image_name) - cv2.imwrite(out_path, vis_result) - else: - return vis_result - - -def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. - Args: - num_classes: Number of classes - Returns: - The color map - """ - num_classes += 1 - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] - color_map = color_map[1:] - return color_map