diff --git a/contrib/HumanSeg/models/humanseg.py b/contrib/HumanSeg/models/humanseg.py index 5873c992ab8405d397806593fd690d3b668c38f2..f5e586ec9e24410313df07379eec7f58637a0fcc 100644 --- a/contrib/HumanSeg/models/humanseg.py +++ b/contrib/HumanSeg/models/humanseg.py @@ -102,7 +102,7 @@ class SegModel(object): # 当前模型状态 self.status = 'Normal' - def _get_single_car_bs(self, batch_size): + def _get_single_card_bs(self, batch_size): if batch_size % len(self.places) == 0: return int(batch_size // len(self.places)) else: @@ -144,7 +144,7 @@ class SegModel(object): capacity=64, use_double_buffer=True, iterable=True) - batch_size_each_gpu = self._get_single_car_bs(batch_size) + batch_size_each_gpu = self._get_single_card_bs(batch_size) self.train_data_loader.set_sample_list_generator( dataset.generator(batch_size=batch_size_each_gpu), places=self.places) diff --git a/contrib/HumanSeg/models/load_model.py b/contrib/HumanSeg/models/load_model.py index fc6e3db7a7f1b51a7522cbe6b65c7cde0b01940b..87d0a6f7176c175460305176c1eaf8639175acd3 100644 --- a/contrib/HumanSeg/models/load_model.py +++ b/contrib/HumanSeg/models/load_model.py @@ -24,7 +24,7 @@ import models def load_model(model_dir): if not osp.exists(osp.join(model_dir, "model.yml")): - raise Exception("There's not model.yml in {}".format(model_dir)) + raise Exception("There's no model.yml in {}".format(model_dir)) with open(osp.join(model_dir, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) status = info['status'] diff --git a/contrib/README.md b/contrib/README.md index 1e4a5494829539030432fa184f657803bba72613..ea1919384e462ddad5ee09368a27b9a004fe2703 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -3,6 +3,7 @@ 提供基于PaddlSeg最新的分割特色模型: - [人像分割](./HumanSeg) +- [遥感分割](./RemoteSensing) - [人体解析](./ACE2P) - [车道线分割](./LaneNet) - [工业表盘分割](#工业表盘分割) @@ -12,6 +13,14 @@ HumanSeg系列全新升级,提供三个适用于不同场景,包含适用于移动端实时分割场景的模型`HumanSeg-lite`,提供了包含光流的后处理的优化,使人像分割在视频场景中更加顺畅,更多详情请参考[HumanSeg](./HumanSeg) +## 遥感分割 Remote Sensing Segmentation +PaddleSeg遥感影像分割涵盖图像预处理、数据增强、模型训练、预测流程。 +针对遥感数据多通道、分布范围大、分布不均的特点,我们支持多通道训练预测,内置10+多通道预处理和数据增强的策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 +内置U-Net, HRNet两种主流分割网络,可选择不同的损失函数如Dice Loss, BCE Loss等方式强化小目标和不均衡样本场景下的分割精度。更多详情请参考[RemoteSensing](./RemoteSensing) + +以下是遥感云检测的示例效果: + +![](./RemoteSensing/docs/imgs/rs.png) ## 人体解析 Human Parsing diff --git a/contrib/RemoteSensing/README.md b/contrib/RemoteSensing/README.md index 4f96cccf8e542e7185f9cd66d29e4f2899fbcb94..e82db25f451c92f4a6b6f0266832955b46e4bcf2 100644 --- a/contrib/RemoteSensing/README.md +++ b/contrib/RemoteSensing/README.md @@ -1,14 +1,20 @@ -# 遥感分割(RemoteSensing) +# PaddleSeg遥感影像分割 遥感影像分割是图像分割领域中的重要应用场景,广泛应用于土地测绘、环境监测、城市建设等领域。遥感影像分割的目标多种多样,有诸如积雪、农作物、道路、建筑、水源等地物目标,也有例如云层的空中目标。 -PaddleSeg提供了针对遥感专题的语义分割库RemoteSensing,涵盖图像预处理、数据增强、模型训练、预测流程,帮助用户利用深度学习技术解决遥感影像分割问题。 +PaddleSeg遥感影像分割涵盖图像预处理、数据增强、模型训练、预测流程,帮助用户利用深度学习技术解决遥感影像分割问题。 ## 特点 -针对遥感数据多通道、分布范围大、分布不均的特点,我们支持多通道训练预测,内置一系列多通道预处理和数据增强的策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 +- 针对遥感数据多通道、分布范围大、分布不均的特点,我们支持多通道训练预测,内置10+多通道预处理和数据增强的策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 -**Note:** 所有命令需要在`PaddleSeg/contrib/RemoteSensing/`目录下执行。 +- 内置U-Net, HRNet两种主流分割网络,可选择不同的损失函数如Dice Loss, BCE Loss等方式强化小目标和不均衡样本场景下的分割精度。 + +以下是遥感云检测的示例效果: + +![](./docs/imgs/rs.png) ## 前置依赖 +**Note:** 若没有特殊说明,以下所有命令需要在`PaddleSeg/contrib/RemoteSensing/`目录下执行。 + - Paddle 1.7.1+ 由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用。 PaddlePaddle的安装, 请按照[官网指引](https://paddlepaddle.org.cn/install/quick)安装合适自己的版本。 @@ -18,7 +24,6 @@ PaddlePaddle的安装, 请按照[官网指引](https://paddlepaddle.org.cn/insta - 其他依赖安装 通过以下命令安装python包依赖,请确保至少执行过一次以下命令: ``` -cd RemoteSensing pip install -r requirements.txt ``` @@ -63,9 +68,9 @@ RemoteSensing # 根目录 ``` 其中,相应的文件名可根据需要自行定义。 -遥感领域图像格式多种多样,不同传感器产生的数据格式可能不同。为方便数据加载,本分割库统一采用numpy存储格式`npy`作为原图格式,采用`png`无损压缩格式作为标注图片格式。 -原图的前两维是图像的尺寸,第3维是图像的通道数。 -标注图像为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增, +遥感影像的格式多种多样,不同传感器产生的数据格式也可能不同。PaddleSeg以numpy.ndarray数据类型进行图像预处理。为统一接口并方便数据加载,我们采用numpy存储格式`npy`作为原图格式,采用`png`无损压缩格式作为标注图片格式。 +原图的尺寸应为(h, w, channel),其中h, w为图像的高和宽,channel为图像的通道数。 +标注图像为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增。 例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。 `train_list.txt`和`val_list.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示: @@ -93,154 +98,38 @@ labelB ### 1. 准备数据集 为了快速体验,我们准备了一个小型demo数据集,已位于`RemoteSensing/dataset/demo/`目录下. -对于您自己的数据集,您需要按照上述的数据协议进行格式转换,可分别使用numpy和pil库保存遥感数据和标注图片。其中numpy api示例如下: +对于您自己的数据集,您需要按照上述的数据协议进行格式转换,可分别使用numpy和Pillow库保存遥感数据和标注图片。其中numpy API示例如下: ```python import numpy as np -# 保存遥感数据 +# 将遥感数据保存到以 .npy 为扩展名的文件中 # img类型:numpy.ndarray np.save(save_path, img) ``` -### 2. 训练代码开发 -通过如下`train_demo.py`代码进行训练。 - -> 导入RemoteSensing api -```python -import transforms.transforms as T -from readers.reader import Reader -from models import UNet -``` - -> 定义训练和验证时的数据处理和增强流程, 在`train_transforms`中加入了`RandomVerticalFlip`,`RandomHorizontalFlip`等数据增强方式。 -```python -train_transforms = T.Compose([ - T.RandomVerticalFlip(0.5), - T.RandomHorizontalFlip(0.5), - T.ResizeStepScaling(0.5, 2.0, 0.25), - T.RandomPaddingCrop(256), - T.Normalize(mean=[0.5] * channel, std=[0.5] * channel), -]) - -eval_transforms = T.Compose([ - T.Normalize(mean=[0.5] * channel, std=[0.5] * channel), -]) -``` - -> 定义数据读取器 -```python -import os -import os.path as osp - -train_list = osp.join(data_dir, 'train.txt') -val_list = osp.join(data_dir, 'val.txt') -label_list = osp.join(data_dir, 'labels.txt') - -train_reader = Reader( - data_dir=data_dir, - file_list=train_list, - label_list=label_list, - transforms=train_transforms, - num_workers=8, - buffer_size=16, - shuffle=True, - parallel_method='thread') - -eval_reader = Reader( - data_dir=data_dir, - file_list=val_list, - label_list=label_list, - transforms=eval_transforms, - num_workers=8, - buffer_size=16, - shuffle=False, - parallel_method='thread') -``` -> 模型构建 -```python -model = UNet( - num_classes=2, input_channel=channel, use_bce_loss=True, use_dice_loss=True) -``` -> 模型训练,并开启边训边评估 -```python -model.train( - num_epochs=num_epochs, - train_reader=train_reader, - train_batch_size=train_batch_size, - eval_reader=eval_reader, - save_interval_epochs=5, - log_interval_steps=10, - save_dir=save_dir, - pretrain_weights=None, - optimizer=None, - learning_rate=lr, - use_vdl=True -) -``` - - -### 3. 模型训练 -> 设置GPU卡号 +### 2. 模型训练 +#### (1) 设置GPU卡号 ```shell script export CUDA_VISIBLE_DEVICES=0 ``` -> 在RemoteSensing目录下运行`train_demo.py`即可开始训练。 +#### (2) 以U-Net为例,在RemoteSensing目录下运行`train_demo.py`即可开始训练。 ```shell script -python train_demo.py --data_dir dataset/demo/ --save_dir saved_model/unet/ --channel 3 --num_epochs 20 -``` -### 4. 模型预测代码开发 -通过如下`predict_demo.py`代码进行预测。 - -> 导入RemoteSensing api -```python -from models import load_model -``` -> 加载训练过程中最好的模型,设置预测结果保存路径。 -```python -import os -import os.path as osp -model = load_model(osp.join(save_dir, 'best_model')) -pred_dir = osp.join(save_dir, 'pred') -if not osp.exists(pred_dir): - os.mkdir(pred_dir) -``` - -> 使用模型对验证集进行测试,并保存预测结果。 -```python -import numpy as np -from PIL import Image as Image -val_list = osp.join(data_dir, 'val.txt') -color_map = [0, 0, 0, 255, 255, 255] -with open(val_list) as f: - lines = f.readlines() - for line in lines: - img_path = line.split(' ')[0] - print('Predicting {}'.format(img_path)) - img_path_ = osp.join(data_dir, img_path) - - pred = model.predict(img_path_) - - # 以伪彩色png图片保存预测结果 - pred_name = osp.basename(img_path).rstrip('npy') + 'png' - pred_path = osp.join(pred_dir, pred_name) - pred_mask = Image.fromarray(pred.astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(pred_path) +python train_demo.py --model_type unet --data_dir dataset/demo/ --save_dir saved_model/unet/ --channel 3 --num_epochs 20 ``` -### 5. 模型预测 -> 设置GPU卡号 +### 3. 模型预测 +#### (1) 设置GPU卡号 ```shell script export CUDA_VISIBLE_DEVICES=0 ``` -> 在RemoteSensing目录下运行`predict_demo.py`即可开始训练。 +#### (2) 以刚训练好的U-Net最优模型为例,在RemoteSensing目录下运行`predict_demo.py`即可开始训练。 ```shell script -python predict_demo.py --data_dir dataset/demo/ --load_model_dir saved_model/unet/best_model/ +python predict_demo.py --data_dir dataset/demo/ --file_list val.txt --load_model_dir saved_model/unet/best_model ``` -## Api说明 +## API说明 -您可以使用`RemoteSensing`目录下提供的api构建自己的分割代码。 +您可以使用`RemoteSensing`目录下提供的API构建自己的分割代码。 - [数据处理-transforms](docs/transforms.md) diff --git a/contrib/RemoteSensing/docs/imgs/rs.png b/contrib/RemoteSensing/docs/imgs/rs.png new file mode 100644 index 0000000000000000000000000000000000000000..620b56c7b3bcfc39a5c9009f47e6e10bbd269115 Binary files /dev/null and b/contrib/RemoteSensing/docs/imgs/rs.png differ diff --git a/contrib/RemoteSensing/models/__init__.py b/contrib/RemoteSensing/models/__init__.py index 49098e44c699162e728cedff915f60d66e37a229..b88424493bd31ef0c794d21febb784724e6ebff4 100644 --- a/contrib/RemoteSensing/models/__init__.py +++ b/contrib/RemoteSensing/models/__init__.py @@ -1,2 +1,3 @@ from .load_model import * from .unet import * +from .hrnet import * diff --git a/contrib/RemoteSensing/models/base.py b/contrib/RemoteSensing/models/base.py index 849947306392cdc2a04427168d2355ae019864bc..3d42fa46d9776dc7e43d73e7b27266c1704ebf1c 100644 --- a/contrib/RemoteSensing/models/base.py +++ b/contrib/RemoteSensing/models/base.py @@ -19,15 +19,16 @@ import numpy as np import time import math import yaml +import tqdm +import cv2 import copy -import json import utils.logging as logging from collections import OrderedDict from os import path as osp -from utils.pretrain_weights import get_pretrain_weights +from utils.utils import seconds_to_hms, get_environ_info +from utils.metrics import ConfusionMatrix import transforms.transforms as T import utils -import __init__ def dict2str(dict_input): @@ -41,12 +42,45 @@ def dict2str(dict_input): return out.strip(', ') -class BaseAPI: - def __init__(self): - # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到 - self.num_classes = None +class BaseModel(object): + def __init__(self, + num_classes=2, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255, + sync_bn=True): + self.init_params = locals() + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.sync_bn = sync_bn + self.labels = None - if __init__.env_info['place'] == 'cpu': + self.env_info = get_environ_info() + if self.env_info['place'] == 'cpu': self.places = fluid.cpu_places() else: self.places = fluid.cuda_places() @@ -60,10 +94,6 @@ class BaseAPI: self.test_outputs = None self.train_data_loader = None self.eval_metrics = None - # 若模型是从inference model加载进来的,无法调用训练接口进行训练 - self.trainable = True - # 是否使用多卡间同步BatchNorm均值和方差 - self.sync_bn = False # 当前模型状态 self.status = 'Normal' @@ -73,16 +103,20 @@ class BaseAPI: else: raise Exception("Please support correct batch_size, \ which can be divided by available cards({}) in {}". - format(__init__.env_info['num'], - __init__.env_info['place'])) + format(self.env_info['num'], + self.env_info['place'])) + + def build_net(self, mode='train'): + """应根据不同的情况进行构建""" + pass def build_program(self): - # 构建训练网络 + # build training network self.train_inputs, self.train_outputs = self.build_net(mode='train') self.train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() - # 构建预测网络 + # build prediction network self.test_prog = fluid.Program() with fluid.program_guard(self.test_prog, startup_prog): with fluid.unique_name.guard(): @@ -90,15 +124,15 @@ class BaseAPI: mode='test') self.test_prog = self.test_prog.clone(for_test=True) - def arrange_transforms(self, transforms, mode='train'): - # 给transforms添加arrange操作 - if transforms.transforms[-1].__class__.__name__.startswith('Arrange'): - transforms.transforms[-1] = T.ArrangeSegmenter(mode=mode) + def arrange_transform(self, transforms, mode='train'): + arrange_transform = T.ArrangeSegmenter + if type(transforms.transforms[-1]).__name__.startswith('Arrange'): + transforms.transforms[-1] = arrange_transform(mode=mode) else: - transforms.transforms.append(T.ArrangeSegmenter(mode=mode)) + transforms.transforms.append(arrange_transform(mode=mode)) - def build_train_data_loader(self, reader, batch_size): - # 初始化data_loader + def build_train_data_loader(self, dataset, batch_size): + # init data_loader if self.train_data_loader is None: self.train_data_loader = fluid.io.DataLoader.from_generator( feed_list=list(self.train_inputs.values()), @@ -106,72 +140,92 @@ class BaseAPI: use_double_buffer=True, iterable=True) batch_size_each_gpu = self._get_single_card_bs(batch_size) - generator = reader.generator( - batch_size=batch_size_each_gpu, drop_last=True) self.train_data_loader.set_sample_list_generator( - reader.generator(batch_size=batch_size_each_gpu), + dataset.generator(batch_size=batch_size_each_gpu), places=self.places) def net_initialize(self, startup_prog=None, pretrain_weights=None, - fuse_bn=False, - save_dir='.', - sensitivities_file=None, - eval_metric_loss=0.05): - if hasattr(self, 'backbone'): - backbone = self.backbone - else: - backbone = self.__class__.__name__ - pretrain_weights = get_pretrain_weights(pretrain_weights, backbone, - save_dir) + resume_weights=None): if startup_prog is None: startup_prog = fluid.default_startup_program() self.exe.run(startup_prog) - if pretrain_weights is not None: + if resume_weights is not None: + logging.info("Resume weights from {}".format(resume_weights)) + if not osp.exists(resume_weights): + raise Exception("Path {} not exists.".format(resume_weights)) + fluid.load(self.train_prog, osp.join(resume_weights, 'model'), + self.exe) + # Check is path ended by path spearator + if resume_weights[-1] == os.sep: + resume_weights = resume_weights[0:-1] + epoch_name = osp.basename(resume_weights) + # If resume weights is end of digit, restore epoch status + epoch = epoch_name.split('_')[-1] + if epoch.isdigit(): + self.begin_epoch = int(epoch) + else: + raise ValueError("Resume model path is not valid!") + logging.info("Model checkpoint loaded successfully!") + + elif pretrain_weights is not None: logging.info( "Load pretrain weights from {}.".format(pretrain_weights)) - utils.utils.load_pretrain_weights(self.exe, self.train_prog, - pretrain_weights, fuse_bn) - # 进行裁剪 - if sensitivities_file is not None: - from .slim.prune_config import get_sensitivities - sensitivities_file = get_sensitivities(sensitivities_file, self, - save_dir) - from .slim.prune import get_params_ratios, prune_program - prune_params_ratios = get_params_ratios( - sensitivities_file, eval_metric_loss=eval_metric_loss) - prune_program(self, prune_params_ratios) - self.status = 'Prune' + utils.load_pretrained_weights(self.exe, self.train_prog, + pretrain_weights) def get_model_info(self): + # 存储相应的信息到yml文件 info = dict() info['Model'] = self.__class__.__name__ - info['_Attributes'] = {} if 'self' in self.init_params: del self.init_params['self'] if '__class__' in self.init_params: del self.init_params['__class__'] info['_init_params'] = self.init_params + info['_Attributes'] = dict() info['_Attributes']['num_classes'] = self.num_classes info['_Attributes']['labels'] = self.labels try: - primary_metric_key = list(self.eval_metrics.keys())[0] - primary_metric_value = float(self.eval_metrics[primary_metric_key]) - info['_Attributes']['eval_metrics'] = { - primary_metric_key: primary_metric_value - } + info['_Attributes']['eval_metric'] = dict() + for k, v in self.eval_metrics.items(): + if isinstance(v, np.ndarray): + if v.size > 1: + v = [float(i) for i in v] + else: + v = float(v) + info['_Attributes']['eval_metric'][k] = v except: pass if hasattr(self, 'test_transforms'): if self.test_transforms is not None: - info['Transforms'] = list() + info['test_transforms'] = list() for op in self.test_transforms.transforms: name = op.__class__.__name__ attr = op.__dict__ - info['Transforms'].append({name: attr}) + info['test_transforms'].append({name: attr}) + + if hasattr(self, 'train_transforms'): + if self.train_transforms is not None: + info['train_transforms'] = list() + for op in self.train_transforms.transforms: + name = op.__class__.__name__ + attr = op.__dict__ + info['train_transforms'].append({name: attr}) + + if hasattr(self, 'train_init'): + if 'self' in self.train_init: + del self.train_init['self'] + if 'train_reader' in self.train_init: + del self.train_init['train_reader'] + if 'eval_reader' in self.train_init: + del self.train_init['eval_reader'] + if 'optimizer' in self.train_init: + del self.train_init['optimizer'] + info['train_init'] = self.train_init return info def save_model(self, save_dir): @@ -179,76 +233,139 @@ class BaseAPI: if osp.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) - fluid.save(self.train_prog, osp.join(save_dir, 'model')) model_info = self.get_model_info() + + if self.status == 'Normal': + fluid.save(self.train_prog, osp.join(save_dir, 'model')) + model_info['status'] = self.status with open( osp.join(save_dir, 'model.yml'), encoding='utf-8', mode='w') as f: yaml.dump(model_info, f) - # 评估结果保存 - if hasattr(self, 'eval_details'): - with open(osp.join(save_dir, 'eval_details.json'), 'w') as f: - json.dump(self.eval_details, f) - - if self.status == 'Prune': - # 保存裁剪的shape - shapes = {} - for block in self.train_prog.blocks: - for param in block.all_parameters(): - pd_var = fluid.global_scope().find_var(param.name) - pd_param = pd_var.get_tensor() - shapes[param.name] = np.array(pd_param).shape - with open( - osp.join(save_dir, 'prune.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(shapes, f) - - # 模型保存成功的标志 + + # The flag of model for saving successfully open(osp.join(save_dir, '.success'), 'w').close() logging.info("Model saved in {}.".format(save_dir)) - def train_loop(self, - num_epochs, - train_reader, - train_batch_size, - eval_reader=None, - eval_best_metric=None, - save_interval_epochs=1, - log_interval_steps=10, - save_dir='output', - use_vdl=False): + def export_inference_model(self, save_dir): + test_input_names = [var.name for var in list(self.test_inputs.values())] + test_outputs = list(self.test_outputs.values()) + fluid.io.save_inference_model( + dirname=save_dir, + executor=self.exe, + params_filename='__params__', + feeded_var_names=test_input_names, + target_vars=test_outputs, + main_program=self.test_prog) + model_info = self.get_model_info() + model_info['status'] = 'Infer' + + # Save input and output descrition of model + model_info['_ModelInputsOutputs'] = dict() + model_info['_ModelInputsOutputs']['test_inputs'] = [ + [k, v.name] for k, v in self.test_inputs.items() + ] + model_info['_ModelInputsOutputs']['test_outputs'] = [ + [k, v.name] for k, v in self.test_outputs.items() + ] + + with open( + osp.join(save_dir, 'model.yml'), encoding='utf-8', + mode='w') as f: + yaml.dump(model_info, f) + + # The flag of model for saving successfully + open(osp.join(save_dir, '.success'), 'w').close() + logging.info("Model for inference deploy saved in {}.".format(save_dir)) + + def default_optimizer(self, + learning_rate, + num_epochs, + num_steps_each_epoch, + lr_decay_power=0.9, + regularization_coeff=4e-5): + decay_step = num_epochs * num_steps_each_epoch + lr_decay = fluid.layers.polynomial_decay( + learning_rate, + decay_step, + end_learning_rate=0, + power=lr_decay_power) + optimizer = fluid.optimizer.Momentum( + lr_decay, + momentum=0.9, + regularization=fluid.regularizer.L2Decay( + regularization_coeff=regularization_coeff)) + return optimizer + + def train(self, + num_epochs, + train_reader, + train_batch_size=2, + eval_reader=None, + eval_best_metric=None, + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights=None, + resume_weights=None, + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + regularization_coeff=4e-5, + use_vdl=False): + self.labels = train_reader.labels + self.train_transforms = train_reader.transforms + self.train_init = locals() + self.begin_epoch = 0 + + if optimizer is None: + num_steps_each_epoch = train_reader.num_samples // train_batch_size + optimizer = self.default_optimizer( + learning_rate=learning_rate, + num_epochs=num_epochs, + num_steps_each_epoch=num_steps_each_epoch, + lr_decay_power=lr_decay_power, + regularization_coeff=regularization_coeff) + self.optimizer = optimizer + self.build_program() + self.net_initialize( + startup_prog=fluid.default_startup_program(), + pretrain_weights=pretrain_weights, + resume_weights=resume_weights) + + if self.begin_epoch >= num_epochs: + raise ValueError( + ("begin epoch[{}] is larger than num_epochs[{}]").format( + self.begin_epoch, num_epochs)) + if not osp.isdir(save_dir): if osp.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) - if use_vdl: - from visualdl import LogWriter - vdl_logdir = osp.join(save_dir, 'vdl_log') - # 给transform添加arrange操作 - self.arrange_transforms( - transforms=train_reader.transforms, mode='train') - # 构建train_data_loader + + # add arrange op tor transforms + self.arrange_transform(transforms=train_reader.transforms, mode='train') self.build_train_data_loader( - reader=train_reader, batch_size=train_batch_size) + dataset=train_reader, batch_size=train_batch_size) if eval_reader is not None: self.eval_transforms = eval_reader.transforms self.test_transforms = copy.deepcopy(eval_reader.transforms) - # 获取实时变化的learning rate lr = self.optimizer._learning_rate + lr.persistable = True if isinstance(lr, fluid.framework.Variable): self.train_outputs['lr'] = lr - # 在多卡上跑训练 + # 多卡训练 if self.parallel_train_prog is None: build_strategy = fluid.compiler.BuildStrategy() - build_strategy.fuse_all_optimizer_ops = False - if __init__.env_info['place'] != 'cpu' and len(self.places) > 1: + if self.env_info['place'] != 'cpu' and len(self.places) > 1: build_strategy.sync_batch_norm = self.sync_bn exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 + self.parallel_train_prog = fluid.CompiledProgram( self.train_prog).with_data_parallel( loss_name=self.train_outputs['loss'].name, @@ -259,16 +376,27 @@ class BaseAPI: train_reader.num_samples / train_batch_size) num_steps = 0 time_stat = list() + time_train_one_epoch = None + time_eval_one_epoch = None + + total_num_steps_eval = 0 + # eval times + total_eval_times = math.ceil(num_epochs / save_interval_epochs) + eval_batch_size = train_batch_size + if eval_reader is not None: + total_num_steps_eval = math.ceil( + eval_reader.num_samples / eval_batch_size) if use_vdl: - # VisualDL component + from visualdl import LogWriter + vdl_logdir = osp.join(save_dir, 'vdl_log') log_writer = LogWriter(vdl_logdir) - - best_accuracy = -1.0 + best_metric = -1.0 best_model_epoch = 1 - for i in range(num_epochs): + for i in range(self.begin_epoch, num_epochs): records = list() step_start_time = time.time() + epoch_start_time = time.time() for step, data in enumerate(self.train_data_loader()): outputs = self.exe.run( self.parallel_train_prog, @@ -277,22 +405,15 @@ class BaseAPI: outputs_avg = np.mean(np.array(outputs), axis=1) records.append(outputs_avg) - # 训练完成剩余时间预估 - current_time = time.time() - step_cost_time = current_time - step_start_time - step_start_time = current_time + # time estimated to complete the training + currend_time = time.time() + step_cost_time = currend_time - step_start_time + step_start_time = currend_time if len(time_stat) < 20: time_stat.append(step_cost_time) else: time_stat[num_steps % 20] = step_cost_time - eta = ((num_epochs - i) * total_num_steps - step - - 1) * np.mean(time_stat) - eta_h = math.floor(eta / 3600) - eta_m = math.floor((eta - eta_h * 3600) / 60) - eta_s = int(eta - eta_h * 3600 - eta_m * 60) - eta_str = "{}:{}:{}".format(eta_h, eta_m, eta_s) - - # 每间隔log_interval_steps,输出loss信息 + num_steps += 1 if num_steps % log_interval_steps == 0: step_metrics = OrderedDict( @@ -301,38 +422,52 @@ class BaseAPI: if use_vdl: for k, v in step_metrics.items(): log_writer.add_scalar( - tag="Training: {}".format(k), - value=v, - step=num_steps) + step=num_steps, + tag='train/{}'.format(k), + value=v) + + # 计算剩余时间 + avg_step_time = np.mean(time_stat) + if time_train_one_epoch is not None: + eta = (num_epochs - i - 1) * time_train_one_epoch + ( + total_num_steps - step - 1) * avg_step_time + else: + eta = ((num_epochs - i) * total_num_steps - step - + 1) * avg_step_time + if time_eval_one_epoch is not None: + eval_eta = (total_eval_times - i // save_interval_epochs + ) * time_eval_one_epoch + else: + eval_eta = (total_eval_times - i // save_interval_epochs + ) * total_num_steps_eval * avg_step_time + eta_str = seconds_to_hms(eta + eval_eta) + logging.info( - "[TRAIN] Epoch={}/{}, Step={}/{}, {}, eta={}".format( - i + 1, num_epochs, step + 1, total_num_steps, - dict2str(step_metrics), eta_str)) + "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" + .format(i + 1, num_epochs, step + 1, total_num_steps, + dict2str(step_metrics), round(avg_step_time, 2), + eta_str)) + train_metrics = OrderedDict( zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) logging.info('[TRAIN] Epoch {} finished, {} .'.format( i + 1, dict2str(train_metrics))) + time_train_one_epoch = time.time() - epoch_start_time - # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 + eval_epoch_start_time = time.time() if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) if not osp.isdir(current_save_dir): os.makedirs(current_save_dir) if eval_reader is not None: - # 检测目前仅支持单卡评估,训练数据batch大小与显卡数量之商为验证数据batch大小。 - eval_batch_size = train_batch_size - self.eval_metrics, self.eval_details = self.evaluate( + self.eval_metrics = self.evaluate( eval_reader=eval_reader, batch_size=eval_batch_size, - verbose=True, - epoch_id=i + 1, - return_details=True) - logging.info('[EVAL] Finished, Epoch={}, {} .'.format( - i + 1, dict2str(self.eval_metrics))) + epoch_id=i + 1) # 保存最优模型 current_metric = self.eval_metrics[eval_best_metric] - if current_metric > best_accuracy: - best_accuracy = current_metric + if current_metric > best_metric: + best_metric = current_metric best_model_epoch = i + 1 best_model_dir = osp.join(save_dir, "best_model") self.save_model(save_dir=best_model_dir) @@ -344,10 +479,131 @@ class BaseAPI: if v.size > 1: continue log_writer.add_scalar( - tag="Evaluation: {}".format(k), - step=i + 1, + step=num_steps, + tag='evaluate/{}'.format(k), value=v) self.save_model(save_dir=current_save_dir) - logging.info( - 'Current evaluated best model in eval_reader is epoch_{}, {}={}' - .format(best_model_epoch, eval_best_metric, best_accuracy)) + time_eval_one_epoch = time.time() - eval_epoch_start_time + if eval_reader is not None: + logging.info( + 'Current evaluated best model in validation dataset is epoch_{}, {}={}' + .format(best_model_epoch, eval_best_metric, + best_metric)) + + def evaluate(self, eval_reader, batch_size=1, epoch_id=None): + """评估。 + + Args: + eval_reader (reader): 评估数据读取器。 + batch_size (int): 评估时的batch大小。默认1。 + epoch_id (int): 当前评估模型所在的训练轮数。 + return_details (bool): 是否返回详细信息。默认False。 + + Returns: + dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 + 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 + tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), + 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 + """ + self.arrange_transform(transforms=eval_reader.transforms, mode='train') + total_steps = math.ceil(eval_reader.num_samples * 1.0 / batch_size) + conf_mat = ConfusionMatrix(self.num_classes, streaming=True) + data_generator = eval_reader.generator( + batch_size=batch_size, drop_last=False) + if not hasattr(self, 'parallel_test_prog'): + self.parallel_test_prog = fluid.CompiledProgram( + self.test_prog).with_data_parallel( + share_vars_from=self.parallel_train_prog) + logging.info( + "Start to evaluating(total_samples={}, total_steps={})...".format( + eval_reader.num_samples, total_steps)) + for step, data in tqdm.tqdm( + enumerate(data_generator()), total=total_steps): + images = np.array([d[0] for d in data]) + images = images.astype(np.float32) + labels = np.array([d[1] for d in data]) + num_samples = images.shape[0] + if num_samples < batch_size: + num_pad_samples = batch_size - num_samples + pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) + images = np.concatenate([images, pad_images]) + feed_data = {'image': images} + outputs = self.exe.run( + self.parallel_test_prog, + feed=feed_data, + fetch_list=list(self.test_outputs.values()), + return_numpy=True) + pred = outputs[0] + if num_samples < batch_size: + pred = pred[0:num_samples] + + mask = labels != self.ignore_index + conf_mat.calculate(pred=pred, label=labels, ignore=mask) + _, iou = conf_mat.mean_iou() + + logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format( + epoch_id, step + 1, total_steps, iou)) + + category_iou, miou = conf_mat.mean_iou() + category_acc, macc = conf_mat.accuracy() + precision, recall = conf_mat.precision_recall() + + metrics = OrderedDict( + zip([ + 'miou', 'category_iou', 'macc', 'category_acc', 'kappa', + 'precision', 'recall' + ], [ + miou, category_iou, macc, category_acc, + conf_mat.kappa(), precision, recall + ])) + + logging.info('[EVAL] Finished, Epoch={}, {} .'.format( + epoch_id, dict2str(metrics))) + return metrics + + def predict(self, im_file, transforms=None): + """预测。 + Args: + img_file(str|np.ndarray): 预测图像。 + transforms(transforms.transforms): 数据预处理操作。 + + Returns: + dict: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图, + 像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes) + """ + if isinstance(im_file, str): + if not osp.exists(im_file): + raise ValueError( + 'The Image file does not exist: {}'.format(im_file)) + + if transforms is None and not hasattr(self, 'test_transforms'): + raise Exception("transforms need to be defined, now is None.") + if transforms is not None: + self.arrange_transform(transforms=transforms, mode='test') + im, im_info = transforms(im_file) + else: + self.arrange_transform(transforms=self.test_transforms, mode='test') + im, im_info = self.test_transforms(im_file) + im = im.astype(np.float32) + im = np.expand_dims(im, axis=0) + result = self.exe.run( + self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values())) + pred = result[0] + logit = result[1] + logit = np.squeeze(logit) + logit = np.transpose(logit, (1, 2, 0)) + pred = np.squeeze(pred).astype('uint8') + keys = list(im_info.keys()) + for k in keys[::-1]: + if k == 'shape_before_resize': + h, w = im_info[k][0], im_info[k][1] + pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) + logit = cv2.resize(logit, (w, h), cv2.INTER_LINEAR) + elif k == 'shape_before_padding': + h, w = im_info[k][0], im_info[k][1] + pred = pred[0:h, 0:w] + logit = logit[0:h, 0:w, :] + + return {'label_map': pred, 'score_map': logit} diff --git a/contrib/RemoteSensing/models/hrnet.py b/contrib/RemoteSensing/models/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..2b575b04649df3402fa29e1b21bb4c7ffdc1704e --- /dev/null +++ b/contrib/RemoteSensing/models/hrnet.py @@ -0,0 +1,154 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import paddle.fluid as fluid +import os +from os import path as osp +import numpy as np +from collections import OrderedDict +import copy +import math +import time +import tqdm +import cv2 +import yaml +import utils +import utils.logging as logging +from utils.utils import seconds_to_hms, get_environ_info +from utils.metrics import ConfusionMatrix +import nets +import transforms.transforms as T +from .base import BaseModel + + +def dict2str(dict_input): + out = '' + for k, v in dict_input.items(): + try: + v = round(float(v), 6) + except: + pass + out = out + '{}={}, '.format(k, v) + return out.strip(', ') + + +class HRNet(BaseModel): + def __init__(self, + num_classes=2, + input_channel=3, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255, + sync_bn=True): + super().__init__( + num_classes=num_classes, + use_bce_loss=use_bce_loss, + use_dice_loss=use_dice_loss, + class_weight=class_weight, + ignore_index=ignore_index, + sync_bn=sync_bn) + self.init_params = locals() + self.input_channel = input_channel + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + + def build_net(self, mode='train'): + """应根据不同的情况进行构建""" + model = nets.HRNet( + self.num_classes, + self.input_channel, + mode=mode, + stage1_num_modules=self.stage1_num_modules, + stage1_num_blocks=self.stage1_num_blocks, + stage1_num_channels=self.stage1_num_channels, + stage2_num_modules=self.stage2_num_modules, + stage2_num_blocks=self.stage2_num_blocks, + stage2_num_channels=self.stage2_num_channels, + stage3_num_modules=self.stage3_num_modules, + stage3_num_blocks=self.stage3_num_blocks, + stage3_num_channels=self.stage3_num_channels, + stage4_num_modules=self.stage4_num_modules, + stage4_num_blocks=self.stage4_num_blocks, + stage4_num_channels=self.stage4_num_channels, + use_bce_loss=self.use_bce_loss, + use_dice_loss=self.use_dice_loss, + class_weight=self.class_weight, + ignore_index=self.ignore_index) + inputs = model.generate_inputs() + model_out = model.build_net(inputs) + outputs = OrderedDict() + if mode == 'train': + self.optimizer.minimize(model_out) + outputs['loss'] = model_out + else: + outputs['pred'] = model_out[0] + outputs['logit'] = model_out[1] + return inputs, outputs + + def train(self, + num_epochs, + train_reader, + train_batch_size=2, + eval_reader=None, + eval_best_metric='kappa', + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights=None, + resume_weights=None, + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + regularization_coeff=5e-4, + use_vdl=False): + super().train( + num_epochs=num_epochs, + train_reader=train_reader, + train_batch_size=train_batch_size, + eval_reader=eval_reader, + eval_best_metric=eval_best_metric, + save_interval_epochs=save_interval_epochs, + log_interval_steps=log_interval_steps, + save_dir=save_dir, + pretrain_weights=pretrain_weights, + resume_weights=resume_weights, + optimizer=optimizer, + learning_rate=learning_rate, + lr_decay_power=lr_decay_power, + regularization_coeff=regularization_coeff, + use_vdl=use_vdl) diff --git a/contrib/RemoteSensing/models/load_model.py b/contrib/RemoteSensing/models/load_model.py index fb55c13125c7ad194196082be00fb5df7c037dd8..6b765c81a242d2dc4aaedef08d63ac6f92073297 100644 --- a/contrib/RemoteSensing/models/load_model.py +++ b/contrib/RemoteSensing/models/load_model.py @@ -25,7 +25,7 @@ import models def load_model(model_dir): if not osp.exists(osp.join(model_dir, "model.yml")): - raise Exception("There's not model.yml in {}".format(model_dir)) + raise Exception("There's no model.yml in {}".format(model_dir)) with open(osp.join(model_dir, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) status = info['status'] @@ -35,8 +35,7 @@ def load_model(model_dir): info['Model'])) model = getattr(models, info['Model'])(**info['_init_params']) - if status == "Normal" or \ - status == "Prune": + if status == "Normal": startup_prog = fluid.Program() model.test_prog = fluid.Program() with fluid.program_guard(model.test_prog, startup_prog): @@ -45,17 +44,12 @@ def load_model(model_dir): mode='test') model.test_prog = model.test_prog.clone(for_test=True) model.exe.run(startup_prog) - if status == "Prune": - from .slim.prune import update_program - model.test_prog = update_program(model.test_prog, model_dir, - model.places[0]) import pickle with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: load_dict = pickle.load(f) fluid.io.set_program_state(model.test_prog, load_dict) - elif status == "Infer" or \ - status == "Quant": + elif status == "Infer": [prog, input_names, outputs] = fluid.io.load_inference_model( model_dir, model.exe, params_filename='__params__') model.test_prog = prog @@ -67,8 +61,8 @@ def load_model(model_dir): for i, out in enumerate(outputs): var_desc = test_outputs_info[i] model.test_outputs[var_desc[0]] = out - if 'Transforms' in info: - model.test_transforms = build_transforms(info['Transforms']) + if 'test_transforms' in info: + model.test_transforms = build_transforms(info['test_transforms']) model.eval_transforms = copy.deepcopy(model.test_transforms) if '_Attributes' in info: diff --git a/contrib/RemoteSensing/models/unet.py b/contrib/RemoteSensing/models/unet.py index bd56a929aa8e0253ce04a899454cadf956d28fbe..243af3339c2bacbb37b2777edceb6b610b8feaf4 100644 --- a/contrib/RemoteSensing/models/unet.py +++ b/contrib/RemoteSensing/models/unet.py @@ -13,19 +13,18 @@ #limitations under the License. from __future__ import absolute_import -import os.path as osp import numpy as np import math import cv2 import paddle.fluid as fluid import utils.logging as logging from collections import OrderedDict -from .base import BaseAPI +from .base import BaseModel from utils.metrics import ConfusionMatrix import nets -class UNet(BaseAPI): +class UNet(BaseModel): """实现UNet网络的构建并进行训练、评估、预测和模型导出。 Args: @@ -55,9 +54,16 @@ class UNet(BaseAPI): use_bce_loss=False, use_dice_loss=False, class_weight=None, - ignore_index=255): + ignore_index=255, + sync_bn=True): + super().__init__( + num_classes=num_classes, + use_bce_loss=use_bce_loss, + use_dice_loss=use_dice_loss, + class_weight=class_weight, + ignore_index=ignore_index, + sync_bn=sync_bn) self.init_params = locals() - super(UNet, self).__init__() # dice_loss或bce_loss只适用两类分割中 if num_classes > 2 and (use_bce_loss or use_dice_loss): raise ValueError( @@ -115,24 +121,6 @@ class UNet(BaseAPI): outputs['logit'] = model_out[1] return inputs, outputs - def default_optimizer(self, - learning_rate, - num_epochs, - num_steps_each_epoch, - lr_decay_power=0.9): - decay_step = num_epochs * num_steps_each_epoch - lr_decay = fluid.layers.polynomial_decay( - learning_rate, - decay_step, - end_learning_rate=0, - power=lr_decay_power) - optimizer = fluid.optimizer.Momentum( - lr_decay, - momentum=0.9, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=4e-05)) - return optimizer - def train(self, num_epochs, train_reader, @@ -142,13 +130,13 @@ class UNet(BaseAPI): save_interval_epochs=1, log_interval_steps=2, save_dir='output', - pretrain_weights='COCO', + pretrain_weights=None, + resume_weights=None, optimizer=None, learning_rate=0.01, lr_decay_power=0.9, - use_vdl=False, - sensitivities_file=None, - eval_metric_loss=0.05): + regularization_coeff=5e-4, + use_vdl=False): """训练。 Args: @@ -160,46 +148,17 @@ class UNet(BaseAPI): save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 save_dir (str): 模型保存路径。默认'output'。 - pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO', - 则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'COCO'。 + pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为None,则不使用预训练模型。 optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 learning_rate (float): 默认优化器的初始学习率。默认0.01。 lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 - sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 - eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 Raises: ValueError: 模型从inference model进行加载。 """ - if not self.trainable: - raise ValueError( - "Model is not trainable since it was loaded from a inference model." - ) - - self.labels = train_reader.labels - - if optimizer is None: - num_steps_each_epoch = train_reader.num_samples // train_batch_size - optimizer = self.default_optimizer( - learning_rate=learning_rate, - num_epochs=num_epochs, - num_steps_each_epoch=num_steps_each_epoch, - lr_decay_power=lr_decay_power) - self.optimizer = optimizer - # 构建训练、验证、预测网络 - self.build_program() - # 初始化网络权重 - self.net_initialize( - startup_prog=fluid.default_startup_program(), - pretrain_weights=pretrain_weights, - save_dir=save_dir, - sensitivities_file=sensitivities_file, - eval_metric_loss=eval_metric_loss) - # 训练 - self.train_loop( + super().train( num_epochs=num_epochs, train_reader=train_reader, train_batch_size=train_batch_size, @@ -208,6 +167,12 @@ class UNet(BaseAPI): save_interval_epochs=save_interval_epochs, log_interval_steps=log_interval_steps, save_dir=save_dir, + pretrain_weights=pretrain_weights, + resume_weights=resume_weights, + optimizer=optimizer, + learning_rate=learning_rate, + lr_decay_power=lr_decay_power, + regularization_coeff=regularization_coeff, use_vdl=use_vdl) def evaluate(self, @@ -231,7 +196,7 @@ class UNet(BaseAPI): tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 """ - self.arrange_transforms(transforms=eval_reader.transforms, mode='eval') + self.arrange_transform(transforms=eval_reader.transforms, mode='eval') total_steps = math.ceil(eval_reader.num_samples * 1.0 / batch_size) conf_mat = ConfusionMatrix(self.num_classes, streaming=True) data_generator = eval_reader.generator( @@ -272,11 +237,16 @@ class UNet(BaseAPI): category_iou, miou = conf_mat.mean_iou() category_acc, macc = conf_mat.accuracy() + precision, recall = conf_mat.precision_recall() metrics = OrderedDict( - zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], - [miou, category_iou, macc, category_acc, - conf_mat.kappa()])) + zip([ + 'miou', 'category_iou', 'macc', 'category_acc', 'kappa', + 'precision', 'recall' + ], [ + miou, category_iou, macc, category_acc, + conf_mat.kappa(), precision, recall + ])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() @@ -296,11 +266,10 @@ class UNet(BaseAPI): if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") if transforms is not None: - self.arrange_transforms(transforms=transforms, mode='test') + self.arrange_transform(transforms=transforms, mode='test') im, im_info = transforms(im_file) else: - self.arrange_transforms( - transforms=self.test_transforms, mode='test') + self.arrange_transform(transforms=self.test_transforms, mode='test') im, im_info = self.test_transforms(im_file) im = im.astype(np.float32) im = np.expand_dims(im, axis=0) @@ -319,4 +288,4 @@ class UNet(BaseAPI): h, w = im_info[k][0], im_info[k][1] pred = pred[0:h, 0:w] - return pred + return {'label_map': pred} diff --git a/contrib/RemoteSensing/nets/__init__.py b/contrib/RemoteSensing/nets/__init__.py index 67cafc4f4222e392c2552e71f5ab1df194d860c8..7ca925549e29148b929ad06d77ead49d22cfd7e4 100644 --- a/contrib/RemoteSensing/nets/__init__.py +++ b/contrib/RemoteSensing/nets/__init__.py @@ -1 +1,2 @@ from .unet import UNet +from .hrnet import HRNet diff --git a/contrib/RemoteSensing/nets/hrnet.py b/contrib/RemoteSensing/nets/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..854a0f03dbc7397bc1f54086a7d5d15572feee8d --- /dev/null +++ b/contrib/RemoteSensing/nets/hrnet.py @@ -0,0 +1,455 @@ +# coding: utf8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr +from .loss import softmax_with_loss +from .loss import dice_loss +from .loss import bce_loss +from .libs import sigmoid_to_softmax + + +class HRNet(object): + def __init__(self, + num_classes, + input_channel=3, + mode='train', + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255): + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.input_channel = input_channel + self.mode = mode + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + + def build_net(self, inputs): + if self.use_dice_loss or self.use_bce_loss: + self.num_classes = 1 + image = inputs['image'] + logit = self._high_resolution_net(image, self.num_classes) + if self.num_classes == 1: + out = sigmoid_to_softmax(logit) + out = fluid.layers.transpose(out, [0, 2, 3, 1]) + else: + out = fluid.layers.transpose(logit, [0, 2, 3, 1]) + + pred = fluid.layers.argmax(out, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + + if self.mode == 'train': + label = inputs['label'] + mask = label != self.ignore_index + return self._get_loss(logit, label, mask) + + else: + if self.num_classes == 1: + logit = sigmoid_to_softmax(logit) + else: + logit = fluid.layers.softmax(logit, axis=1) + return pred, logit + + return logit + + def generate_inputs(self): + inputs = OrderedDict() + inputs['image'] = fluid.data( + dtype='float32', + shape=[None, self.input_channel, None, None], + name='image') + if self.mode == 'train': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + elif self.mode == 'eval': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + return inputs + + def _get_loss(self, logit, label, mask): + avg_loss = 0 + if not (self.use_dice_loss or self.use_bce_loss): + avg_loss += softmax_with_loss( + logit, + label, + mask, + num_classes=self.num_classes, + weight=self.class_weight, + ignore_index=self.ignore_index) + else: + if self.use_dice_loss: + avg_loss += dice_loss(logit, label, mask) + if self.use_bce_loss: + avg_loss += bce_loss( + logit, label, mask, ignore_index=self.ignore_index) + + return avg_loss + + def _conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr(initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + initializer=fluid.initializer.Constant(1.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + initializer=fluid.initializer.Constant(0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + bn = fluid.layers.relu(bn) + return bn + + def _basic_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self._conv_bn_layer( + input=input, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv1') + conv = self._conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + if_act=False, + name=name + '_conv2') + if downsample: + residual = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + if_act=False, + name=name + '_downsample') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def _bottleneck_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + name=name + '_conv1') + conv = self._conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv2') + conv = self._conv_bn_layer( + input=conv, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_conv3') + if downsample: + residual = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_downsample') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def _fuse_layers(self, x, channels, multi_scale_output=True, name=None): + out = [] + for i in range(len(channels) if multi_scale_output else 1): + residual = x[i] + shape = fluid.layers.shape(residual)[-2:] + for j in range(len(channels)): + if j > i: + y = self._conv_bn_layer( + x[j], + filter_size=1, + num_filters=channels[i], + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) + y = fluid.layers.resize_bilinear(input=y, out_shape=shape) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = x[j] + for k in range(i - j): + if k == i - j - 1: + y = self._conv_bn_layer( + y, + filter_size=3, + num_filters=channels[i], + stride=2, + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + else: + y = self._conv_bn_layer( + y, + filter_size=3, + num_filters=channels[j], + stride=2, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + residual = fluid.layers.relu(residual) + out.append(residual) + return out + + def _branches(self, x, block_num, channels, name=None): + out = [] + for i in range(len(channels)): + residual = x[i] + for j in range(block_num[i]): + residual = self._basic_block( + residual, + channels[i], + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1)) + out.append(residual) + return out + + def _high_resolution_module(self, + x, + blocks, + channels, + multi_scale_output=True, + name=None): + residual = self._branches(x, blocks, channels, name=name) + out = self._fuse_layers( + residual, + channels, + multi_scale_output=multi_scale_output, + name=name) + return out + + def _transition_layer(self, x, in_channels, out_channels, name=None): + num_in = len(in_channels) + num_out = len(out_channels) + out = [] + for i in range(num_out): + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self._conv_bn_layer( + x[i], + filter_size=3, + num_filters=out_channels[i], + name=name + '_layer_' + str(i + 1)) + out.append(residual) + else: + out.append(x[i]) + else: + residual = self._conv_bn_layer( + x[-1], + filter_size=3, + num_filters=out_channels[i], + stride=2, + name=name + '_layer_' + str(i + 1)) + out.append(residual) + return out + + def _stage(self, + x, + num_modules, + num_blocks, + num_channels, + multi_scale_output=True, + name=None): + out = x + for i in range(num_modules): + if i == num_modules - 1 and multi_scale_output == False: + out = self._high_resolution_module( + out, + num_blocks, + num_channels, + multi_scale_output=False, + name=name + '_' + str(i + 1)) + else: + out = self._high_resolution_module( + out, num_blocks, num_channels, name=name + '_' + str(i + 1)) + + return out + + def _layer1(self, input, num_modules, num_blocks, num_channels, name=None): + # num_modules 默认为1,是否增加处理,官网实现为[1],是否对齐。 + conv = input + for i in range(num_blocks[0]): + conv = self._bottleneck_block( + conv, + num_filters=num_channels[0], + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1)) + return conv + + def _high_resolution_net(self, input, num_classes): + x = self._conv_bn_layer( + input=input, + filter_size=3, + num_filters=self.stage1_num_channels[0], + stride=2, + if_act=True, + name='layer1_1') + x = self._conv_bn_layer( + input=x, + filter_size=3, + num_filters=self.stage1_num_channels[0], + stride=2, + if_act=True, + name='layer1_2') + + la1 = self._layer1( + x, + self.stage1_num_modules, + self.stage1_num_blocks, + self.stage1_num_channels, + name='layer2') + tr1 = self._transition_layer([la1], + self.stage1_num_channels, + self.stage2_num_channels, + name='tr1') + st2 = self._stage( + tr1, + self.stage2_num_modules, + self.stage2_num_blocks, + self.stage2_num_channels, + name='st2') + tr2 = self._transition_layer( + st2, self.stage2_num_channels, self.stage3_num_channels, name='tr2') + st3 = self._stage( + tr2, + self.stage3_num_modules, + self.stage3_num_blocks, + self.stage3_num_channels, + name='st3') + tr3 = self._transition_layer( + st3, self.stage3_num_channels, self.stage4_num_channels, name='tr3') + st4 = self._stage( + tr3, + self.stage4_num_modules, + self.stage4_num_blocks, + self.stage4_num_channels, + name='st4') + + # upsample + shape = fluid.layers.shape(st4[0])[-2:] + st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape) + st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape) + st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) + + out = fluid.layers.concat(st4, axis=1) + last_channels = sum(self.stage4_num_channels) + + out = self._conv_bn_layer( + input=out, + filter_size=1, + num_filters=last_channels, + stride=1, + if_act=True, + name='conv-2') + out = fluid.layers.conv2d( + input=out, + num_filters=num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(initializer=MSRA(), name='conv-1_weights'), + bias_attr=False) + + input_shape = fluid.layers.shape(input)[-2:] + out = fluid.layers.resize_bilinear(out, input_shape) + + return out diff --git a/contrib/RemoteSensing/predict_demo.py b/contrib/RemoteSensing/predict_demo.py index 2d7b8c2940882783f69685286cc5d7970e768cb0..fb167c30d227d662e71c8af9e85d80fae7dc9d3e 100644 --- a/contrib/RemoteSensing/predict_demo.py +++ b/contrib/RemoteSensing/predict_demo.py @@ -1,5 +1,6 @@ import os import os.path as osp +import sys import numpy as np from PIL import Image as Image import argparse @@ -8,46 +9,81 @@ from models import load_model def parse_args(): parser = argparse.ArgumentParser(description='RemoteSensing predict') + parser.add_argument( + '--single_img', + dest='single_img', + help='single image path to predict', + default=None, + type=str) parser.add_argument( '--data_dir', dest='data_dir', help='dataset directory', default=None, type=str) + parser.add_argument( + '--file_list', + dest='file_list', + help='file name of predict file list', + default=None, + type=str) parser.add_argument( '--load_model_dir', dest='load_model_dir', help='model load directory', default=None, type=str) + parser.add_argument( + '--save_img_dir', + dest='save_img_dir', + help='save directory name of predict results', + default='predict_results', + type=str) + if len(sys.argv) < 2: + parser.print_help() + sys.exit(1) return parser.parse_args() args = parse_args() - data_dir = args.data_dir +file_list = args.file_list +single_img = args.single_img load_model_dir = args.load_model_dir +save_img_dir = args.save_img_dir +if not osp.exists(save_img_dir): + os.makedirs(save_img_dir) # predict model = load_model(load_model_dir) -pred_dir = osp.join(load_model_dir, 'predict') -if not osp.exists(pred_dir): - os.mkdir(pred_dir) - -val_list = osp.join(data_dir, 'val.txt') -color_map = [0, 0, 0, 255, 255, 255] -with open(val_list) as f: - lines = f.readlines() - for line in lines: - img_path = line.split(' ')[0] - print('Predicting {}'.format(img_path)) - img_path_ = osp.join(data_dir, img_path) - - pred = model.predict(img_path_) - - # 以伪彩色png图片保存预测结果 - pred_name = osp.basename(img_path).rstrip('npy') + 'png' - pred_path = osp.join(pred_dir, pred_name) - pred_mask = Image.fromarray(pred.astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(pred_path) + +color_map = [0, 0, 0, 0, 255, 0] +if single_img is not None: + pred = model.predict(single_img) + # 以伪彩色png图片保存预测结果 + pred_name = osp.basename(single_img).rstrip('npy') + 'png' + pred_path = osp.join(save_img_dir, pred_name) + pred_mask = Image.fromarray(pred['label_map'].astype(np.uint8), mode='P') + pred_mask.putpalette(color_map) + pred_mask.save(pred_path) +elif (file_list is not None) and (data_dir is not None): + with open(osp.join(data_dir, file_list)) as f: + lines = f.readlines() + for line in lines: + img_path = line.split(' ')[0] + print('Predicting {}'.format(img_path)) + img_path_ = osp.join(data_dir, img_path) + + pred = model.predict(img_path_) + + # 以伪彩色png图片保存预测结果 + pred_name = osp.basename(img_path).rstrip('npy') + 'png' + pred_path = osp.join(save_img_dir, pred_name) + pred_mask = Image.fromarray( + pred['label_map'].astype(np.uint8), mode='P') + pred_mask.putpalette(color_map) + pred_mask.save(pred_path) +else: + raise Exception( + 'You should either set the parameter single_img, or set the parameters data_dir, file_list.' + ) diff --git a/contrib/RemoteSensing/train_demo.py b/contrib/RemoteSensing/train_demo.py index afd3e8523a4e8007d5d0847cfd5f2460d19dc269..b6ff0905dd1d4a4888c3c63414e0c9c1d84d9915 100644 --- a/contrib/RemoteSensing/train_demo.py +++ b/contrib/RemoteSensing/train_demo.py @@ -2,11 +2,17 @@ import os.path as osp import argparse import transforms.transforms as T from readers.reader import Reader -from models import UNet +from models import UNet, HRNet def parse_args(): parser = argparse.ArgumentParser(description='RemoteSensing training') + parser.add_argument( + '--model_type', + dest='model_type', + help="Model type for traing, which is one of ('unet', 'hrnet')", + type=str, + default='hrnet') parser.add_argument( '--data_dir', dest='data_dir', @@ -43,7 +49,6 @@ def parse_args(): args = parse_args() - data_dir = args.data_dir save_dir = args.save_dir channel = args.channel @@ -52,17 +57,9 @@ train_batch_size = args.train_batch_size lr = args.lr # 定义训练和验证时的transforms -train_transforms = T.Compose([ - T.RandomVerticalFlip(0.5), - T.RandomHorizontalFlip(0.5), - T.ResizeStepScaling(0.5, 2.0, 0.25), - T.RandomPaddingCrop(256), - T.Normalize(mean=[0.5] * channel, std=[0.5] * channel), -]) +train_transforms = T.Compose([T.RandomHorizontalFlip(0.5), T.Normalize()]) -eval_transforms = T.Compose([ - T.Normalize(mean=[0.5] * channel, std=[0.5] * channel), -]) +eval_transforms = T.Compose([T.Normalize()]) train_list = osp.join(data_dir, 'train.txt') val_list = osp.join(data_dir, 'val.txt') @@ -74,23 +71,30 @@ train_reader = Reader( file_list=train_list, label_list=label_list, transforms=train_transforms, - num_workers=8, - buffer_size=16, - shuffle=True, - parallel_method='thread') + shuffle=True) eval_reader = Reader( data_dir=data_dir, file_list=val_list, label_list=label_list, - transforms=eval_transforms, - num_workers=8, - buffer_size=16, - shuffle=False, - parallel_method='thread') + transforms=eval_transforms) -model = UNet( - num_classes=2, input_channel=channel, use_bce_loss=True, use_dice_loss=True) +if args.model_type == 'unet': + model = UNet( + num_classes=2, + input_channel=channel, + use_bce_loss=True, + use_dice_loss=True) +elif args.model_type == 'hrnet': + model = HRNet( + num_classes=2, + input_channel=channel, + use_bce_loss=True, + use_dice_loss=True) +else: + raise ValueError( + "--model_type: {} is set wrong, it shold be one of ('unet', " + "'hrnet')".format(args.model_type)) model.train( num_epochs=num_epochs, @@ -100,7 +104,5 @@ model.train( save_interval_epochs=5, log_interval_steps=10, save_dir=save_dir, - pretrain_weights=None, - optimizer=None, learning_rate=lr, use_vdl=True) diff --git a/contrib/RemoteSensing/utils/metrics.py b/contrib/RemoteSensing/utils/metrics.py index 2898be028f3dfa03ad9892310da89f7695829542..7dc09977843eae1af6d98be589954a849f6550aa 100644 --- a/contrib/RemoteSensing/utils/metrics.py +++ b/contrib/RemoteSensing/utils/metrics.py @@ -143,3 +143,14 @@ class ConfusionMatrix(object): kappa = (po - pe) / (1 - pe) return kappa + + def precision_recall(self): + ''' + precision, recall of foreground(value=1) for 2 categories + ''' + TP = self.confusion_matrix[1, 1] + FN = self.confusion_matrix[1, 0] + FP = self.confusion_matrix[0, 1] + recall = TP / (TP + FN) + precision = TP / (TP + FP) + return precision, recall diff --git a/contrib/RemoteSensing/utils/utils.py b/contrib/RemoteSensing/utils/utils.py index ecce788190e594eef8c259db84e47e0959cae184..d9715192b0cd68608e91485d665a62349c24bdfb 100644 --- a/contrib/RemoteSensing/utils/utils.py +++ b/contrib/RemoteSensing/utils/utils.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -import time import os import os.path as osp import numpy as np import six -import yaml import math from . import logging