From 25ba89f1e35f540445f910479ed530dd0e0b842b Mon Sep 17 00:00:00 2001 From: wangna11BD <79366697+wangna11BD@users.noreply.github.com> Date: Fri, 17 Sep 2021 14:28:48 +0800 Subject: [PATCH] add PAN model (#426) * add PAN model * add comparison of torch --- configs/pan_psnr_x4_div2k.yaml | 109 ++++++++++ .../single_image_super_resolution.md | 25 ++- .../single_image_super_resolution.md | 34 ++- ppgan/models/generators/__init__.py | 1 + ppgan/models/generators/pan.py | 196 ++++++++++++++++++ ppgan/models/sr_model.py | 14 +- 6 files changed, 368 insertions(+), 11 deletions(-) create mode 100644 configs/pan_psnr_x4_div2k.yaml create mode 100644 ppgan/models/generators/pan.py diff --git a/configs/pan_psnr_x4_div2k.yaml b/configs/pan_psnr_x4_div2k.yaml new file mode 100644 index 0000000..2b848ea --- /dev/null +++ b/configs/pan_psnr_x4_div2k.yaml @@ -0,0 +1,109 @@ +total_iters: 1000000 +output_dir: output_dir +# tensor range for function tensor2img +min_max: + (0., 1.) + +model: + name: BaseSRModel + generator: + name: PAN + in_nc: 3 + out_nc: 3 + nf: 40 + unf: 24 + nb: 16 + scale: 4 + pixel_criterion: + name: L1Loss + use_init_weight: True + +dataset: + train: + name: SRDataset + gt_folder: data/DIV2K/DIV2K_train_HR_sub + lq_folder: data/DIV2K/DIV2K_train_LR_bicubic/X4_sub + num_workers: 6 + batch_size: 32 #1 GPU + use_shared_memory: False + scale: 4 + preprocess: + - name: LoadImageFromFile + key: lq + - name: LoadImageFromFile + key: gt + - name: Transforms + input_keys: [lq, gt] + pipeline: + - name: SRPairedRandomCrop + gt_patch_size: 256 + scale: 4 + keys: [image, image] + - name: PairedRandomHorizontalFlip + keys: [image, image] + - name: PairedRandomVerticalFlip + keys: [image, image] + - name: PairedRandomTransposeHW + keys: [image, image] + - name: Transpose + keys: [image, image] + - name: Normalize + mean: [0., 0., 0.] + std: [255., 255., 255.] + keys: [image, image] + test: + name: SRDataset + gt_folder: data/Set14/GTmod12 + lq_folder: data/Set14/LRbicx4 + scale: 4 + preprocess: + - name: LoadImageFromFile + key: lq + - name: LoadImageFromFile + key: gt + - name: Transforms + input_keys: [lq, gt] + pipeline: + - name: Transpose + keys: [image, image] + - name: Normalize + mean: [0., 0., 0.] + std: [255., 255., 255.] + keys: [image, image] + +lr_scheduler: + name: CosineAnnealingRestartLR + learning_rate: !!float 7e-4 + periods: [250000, 250000, 250000, 250000] + restart_weights: [1, 1, 1, 1] + eta_min: !!float 1e-7 + +optimizer: + name: Adam + # add parameters of net_name to optim + # name should in self.nets + net_names: + - generator + beta1: 0.9 + beta2: 0.99 + +validate: + interval: 5000 + save_img: false + + metrics: + psnr: # metric name, can be arbitrary + name: PSNR + crop_border: 4 + test_y_channel: False + ssim: + name: SSIM + crop_border: 4 + test_y_channel: False + +log_config: + interval: 100 + visiual_interval: 5000 + +snapshot_config: + interval: 5000 diff --git a/docs/en_US/tutorials/single_image_super_resolution.md b/docs/en_US/tutorials/single_image_super_resolution.md index 8e6d40d..44a7e36 100644 --- a/docs/en_US/tutorials/single_image_super_resolution.md +++ b/docs/en_US/tutorials/single_image_super_resolution.md @@ -3,13 +3,13 @@ ## 1.1 Principle Super resolution is a process of upscaling and improving the details within an image. It usually takes a low-resolution image as input and upscales the same image to a higher resolution as output. - Here we provide three super-resolution models, namely [RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf), [ESRGAN](https://arxiv.org/abs/1809.00219v2), [LESRCNN](https://arxiv.org/abs/2007.04344). + Here we provide four super-resolution models, namely [RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf), [ESRGAN](https://arxiv.org/abs/1809.00219v2), [LESRCNN](https://arxiv.org/abs/2007.04344), [PAN](https://arxiv.org/pdf/2010.01073.pdf). [RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf) focus on designing a novel degradation framework for realworld images by estimating various blur kernels as well as real noise distributions. Based on the novel degradation framework, we can acquire LR images sharing a common domain with real-world images. RealSR is a real-world super-resolution model aiming at better perception. Extensive experiments on synthetic noise data and real-world images demonstrate that RealSR outperforms the state-of-the-art methods, resulting in lower noise and better visual quality. - [ESRGAN](https://arxiv.org/abs/1809.00219v2) is an enhanced SRGAN. To further enhance the visual quality of SRGAN, ESRGAN improves three key components of srgan. In addition, ESRGAN also introduces the Residual-in-Residual Dense Block (RRDB) without batch normalization as the basic network building unit, lets the discriminator predict relative realness instead of the absolute value, and improves the perceptual loss by using the features before activation. Benefiting from these improvements, the proposed ESRGAN achieves consistently better visual quality with more realistic and natural textures than SRGAN and won the first place in the PIRM2018-SR Challenge. - - Considering that the application of CNN in SISR often consume high computational cost and more memory storage for training a SR model, a lightweight enhanced SR CNN ([LESRCNN](https://arxiv.org/abs/2007.04344)) was proposed.Extensive experiments demonstrate that the proposed LESRCNN outperforms state-of-the-arts on SISR in terms of qualitative and quantitative evaluation. + [ESRGAN](https://arxiv.org/abs/1809.00219v2) is an enhanced SRGAN. To further enhance the visual quality of SRGAN, ESRGAN improves three key components of srgan. In addition, ESRGAN also introduces the Residual-in-Residual Dense Block (RRDB) without batch normalization as the basic network building unit, lets the discriminator predict relative realness instead of the absolute value, and improves the perceptual loss by using the features before activation. Benefiting from these improvements, the proposed ESRGAN achieves consistently better visual quality with more realistic and natural textures than SRGAN and won the first place in the PIRM2018-SR Challenge. + + Considering that the application of CNN in SISR often consume high computational cost and more memory storage for training a SR model, a lightweight enhanced SR CNN ([LESRCNN](https://arxiv.org/abs/2007.04344)) was proposed.Extensive experiments demonstrate that the proposed LESRCNN outperforms state-of-the-arts on SISR in terms of qualitative and quantitative evaluation. Then [PAN](https://arxiv.org/pdf/2010.01073.pdf) designed a lightweight convolutional neural network for image super-resolution (SR). ## 1.2 How to use @@ -127,6 +127,7 @@ The metrics are PSNR / SSIM. | lesrcnn_x4 | 31.9476 / 0.8909 | 28.4110 / 0.7770 | 30.231 / 0.8326 | | esrgan_psnr_x4 | 32.5512 / 0.8991 | 28.8114 / 0.7871 | 30.7565 / 0.8449 | | esrgan_x4 | 28.7647 / 0.8187 | 25.0065 / 0.6762 | 26.9013 / 0.7542 | +| pan_x4 | 30.4574 / 0.8643 | 26.7204 / 0.7434 | 28.9187 / 0.8176 | | drns_x4 | 32.6684 / 0.8999 | 28.9037 / 0.7885 | - | @@ -142,6 +143,7 @@ The metrics are PSNR / SSIM. | lesrcnn_x4 | DIV2K | [lesrcnn_x4](https://paddlegan.bj.bcebos.com/models/lesrcnn_x4.pdparams) | esrgan_psnr_x4 | DIV2K | [esrgan_psnr_x4](https://paddlegan.bj.bcebos.com/models/esrgan_psnr_x4.pdparams) | esrgan_x4 | DIV2K | [esrgan_x4](https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams) +| pan_x4 | DIV2K | [pan_x4](https://paddlegan.bj.bcebos.com/models/pan_x4.pdparams) | drns_x4 | DIV2K | [drns_x4](https://paddlegan.bj.bcebos.com/models/DRNSx4.pdparams) @@ -184,7 +186,20 @@ The metrics are PSNR / SSIM. publisher={Elsevier} } ``` -- 4. [Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution](https://arxiv.org/pdf/2003.07018.pdf) + +- 4. [Efficient Image Super-Resolution Using Pixel Attention](https://arxiv.org/pdf/2010.01073.pdf) + + ``` + @inproceedings{Hengyuan2020Efficient, + title={Efficient Image Super-Resolution Using Pixel Attention}, + author={Hengyuan Zhao and Xiangtao Kong and Jingwen He and Yu Qiao and Chao Dong}, + booktitle={Computer Vision – ECCV 2020 Workshops}, + volume={12537}, + pages={56-72}, + year={2020} + } + ``` +- 5. [Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution](https://arxiv.org/pdf/2003.07018.pdf) ``` @inproceedings{guo2020closed, diff --git a/docs/zh_CN/tutorials/single_image_super_resolution.md b/docs/zh_CN/tutorials/single_image_super_resolution.md index f7618ff..7572584 100644 --- a/docs/zh_CN/tutorials/single_image_super_resolution.md +++ b/docs/zh_CN/tutorials/single_image_super_resolution.md @@ -2,14 +2,15 @@ ## 1.1 原理介绍 - 超分是放大和改善图像细节的过程。它通常将低分辨率图像作为输入,将同一图像放大到更高分辨率作为输出。这里我们提供了三种超分辨率模型,即[RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf), [ESRGAN](https://arxiv.org/abs/1809.00219v2), [LESRCNN](https://arxiv.org/abs/2007.04344). + 超分是放大和改善图像细节的过程。它通常将低分辨率图像作为输入,将同一图像放大到更高分辨率作为输出。这里我们提供了四种超分辨率模型,即[RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf), [ESRGAN](https://arxiv.org/abs/1809.00219v2), [LESRCNN](https://arxiv.org/abs/2007.04344),[PAN](https://arxiv.org/pdf/2010.01073.pdf). [RealSR](https://openaccess.thecvf.com/content_CVPRW_2020/papers/w31/Ji_Real-World_Super-Resolution_via_Kernel_Estimation_and_Noise_Injection_CVPRW_2020_paper.pdf)通过估计各种模糊内核以及实际噪声分布,为现实世界的图像设计一种新颖的真实图片降采样框架。基于该降采样框架,可以获取与真实世界图像共享同一域的低分辨率图像。RealSR是一个旨在提高感知度的真实世界超分辨率模型。对合成噪声数据和真实世界图像进行的大量实验表明,RealSR模型能够有效降低了噪声并提高了视觉质量。 [ESRGAN](https://arxiv.org/abs/1809.00219v2)是增强型SRGAN,为了进一步提高SRGAN的视觉质量,ESRGAN在SRGAN的基础上改进了SRGAN的三个关键组件。此外,ESRGAN还引入了未经批量归一化的剩余密集块(RRDB)作为基本的网络构建单元,让鉴别器预测相对真实性而不是绝对值,并利用激活前的特征改善感知损失。得益于这些改进,提出的ESRGAN实现了比SRGAN更好的视觉质量和更逼真、更自然的纹理,并在PIRM2018-SR挑战赛中获得第一名。 - 考虑到CNNs在SISR的应用上往往会消耗大量的计算量和存储空间来训练SR模型,轻量级增强SR-CNN([LESRCNN](https://arxiv.org/abs/2007.04344))被提出。大量实验表明,LESRCNN在定性和定量评价方面优于现有的SISR算法。 + 考虑到CNNs在SISR的应用上往往会消耗大量的计算量和存储空间来训练SR模型。轻量级增强SR-CNN([LESRCNN](https://arxiv.org/abs/2007.04344))被提出。大量实验表明,LESRCNN在定性和定量评价方面优于现有的SISR算法。 + 之后[PAN](https://arxiv.org/pdf/2010.01073.pdf)设计了一种用于图像超分辨率(SR)的轻量级卷积神经网络。 -## 1.2 如何使用 +## 1.2 如何使用 ### 1.2.1 数据准备 @@ -107,8 +108,18 @@ | lesrcnn_x4 | 31.9476 / 0.8909 | 28.4110 / 0.7770 | 30.231 / 0.8326 | | esrgan_psnr_x4 | 32.5512 / 0.8991 | 28.8114 / 0.7871 | 30.7565 / 0.8449 | | esrgan_x4 | 28.7647 / 0.8187 | 25.0065 / 0.6762 | 26.9013 / 0.7542 | +| pan_x4 | 30.4574 / 0.8643 | 26.7204 / 0.7434 | 28.9187 / 0.8176 | | drns_x4 | 32.6684 / 0.8999 | 28.9037 / 0.7885 | - | +PAN指标对比 + +paddle模型使用DIV2K数据集训练,torch模型使用df2k和DIV2K数据集训练。 + +| 框架 | Set5 | Set14 | +|---|---|---| +| paddle | 30.4574 / 0.8643 | 26.7204 / 0.7434 | +| torch | 30.2183 / 0.8643 | 26.8035 / 0.7445 | + @@ -122,6 +133,7 @@ | lesrcnn_x4 | DIV2K | [lesrcnn_x4](https://paddlegan.bj.bcebos.com/models/lesrcnn_x4.pdparams) | esrgan_psnr_x4 | DIV2K | [esrgan_psnr_x4](https://paddlegan.bj.bcebos.com/models/esrgan_psnr_x4.pdparams) | esrgan_x4 | DIV2K | [esrgan_x4](https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams) +| pan_x4 | DIV2K | [pan_x4](https://paddlegan.bj.bcebos.com/models/pan_x4.pdparams) | drns_x4 | DIV2K | [drns_x4](https://paddlegan.bj.bcebos.com/models/DRNSx4.pdparams) @@ -164,7 +176,19 @@ publisher={Elsevier} } ``` -- 4. [Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution](https://arxiv.org/pdf/2003.07018.pdf) +- 4. [Efficient Image Super-Resolution Using Pixel Attention](https://arxiv.org/pdf/2010.01073.pdf) + + ``` + @inproceedings{Hengyuan2020Efficient, + title={Efficient Image Super-Resolution Using Pixel Attention}, + author={Hengyuan Zhao and Xiangtao Kong and Jingwen He and Yu Qiao and Chao Dong}, + booktitle={Computer Vision – ECCV 2020 Workshops}, + volume={12537}, + pages={56-72}, + year={2020} + } + ``` + - 5. [Closed-loop Matters: Dual Regression Networks for Single Image Super-Resolution](https://arxiv.org/pdf/2003.07018.pdf) ``` @inproceedings{guo2020closed, @@ -173,4 +197,4 @@ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, year={2020} } - ``` \ No newline at end of file + ``` diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py index 006bd07..278afba 100755 --- a/ppgan/models/generators/__init__.py +++ b/ppgan/models/generators/__init__.py @@ -34,3 +34,4 @@ from .basicvsr import BasicVSRNet from .mpr import MPRNet from .iconvsr import IconVSR from .gpen import GPEN +from .pan import PAN diff --git a/ppgan/models/generators/pan.py b/ppgan/models/generators/pan.py new file mode 100644 index 0000000..2e11975 --- /dev/null +++ b/ppgan/models/generators/pan.py @@ -0,0 +1,196 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .builder import GENERATORS + + +def make_multi_blocks(func, num_layers): + """Make layers by stacking the same blocks. + + Args: + func (nn.Layer): nn.Layer class for basic block. + num_layers (int): number of blocks. + + Returns: + nn.Sequential: Stacked blocks in nn.Sequential. + """ + Blocks = nn.Sequential() + for i in range(num_layers): + Blocks.add_sublayer('block%d' % i, func()) + return Blocks + + +class PA(nn.Layer): + '''PA is pixel attention''' + def __init__(self, nf): + + super(PA, self).__init__() + self.conv = nn.Conv2D(nf, nf, 1) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + + y = self.conv(x) + y = self.sigmoid(y) + out = x * y + + return out + + +class PAConv(nn.Layer): + def __init__(self, nf, k_size=3): + + super(PAConv, self).__init__() + self.k2 = nn.Conv2D(nf, nf, 1) # 1x1 convolution nf->nf + self.sigmoid = nn.Sigmoid() + self.k3 = nn.Conv2D(nf, + nf, + kernel_size=k_size, + padding=(k_size - 1) // 2, + bias_attr=False) # 3x3 convolution + self.k4 = nn.Conv2D(nf, + nf, + kernel_size=k_size, + padding=(k_size - 1) // 2, + bias_attr=False) # 3x3 convolution + + def forward(self, x): + + y = self.k2(x) + y = self.sigmoid(y) + + out = self.k3(x) * y + out = self.k4(out) + + return out + + +class SCPA(nn.Layer): + """ + SCPA is modified from SCNet (Jiang-Jiang Liu et al. Improving Convolutional Networks with Self-Calibrated Convolutions. In CVPR, 2020) + """ + def __init__(self, nf, reduction=2, stride=1, dilation=1): + super(SCPA, self).__init__() + group_width = nf // reduction + + self.conv1_a = nn.Conv2D(nf, + group_width, + kernel_size=1, + bias_attr=False) + self.conv1_b = nn.Conv2D(nf, + group_width, + kernel_size=1, + bias_attr=False) + + self.k1 = nn.Sequential( + nn.Conv2D(group_width, + group_width, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias_attr=False)) + + self.PAConv = PAConv(group_width) + + self.conv3 = nn.Conv2D(group_width * reduction, + nf, + kernel_size=1, + bias_attr=False) + + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + + def forward(self, x): + residual = x + + out_a = self.conv1_a(x) + out_b = self.conv1_b(x) + out_a = self.lrelu(out_a) + out_b = self.lrelu(out_b) + + out_a = self.k1(out_a) + out_b = self.PAConv(out_b) + out_a = self.lrelu(out_a) + out_b = self.lrelu(out_b) + + out = self.conv3(paddle.concat([out_a, out_b], axis=1)) + out += residual + + return out + + +@GENERATORS.register() +class PAN(nn.Layer): + def __init__(self, in_nc, out_nc, nf, unf, nb, scale=4): + super(PAN, self).__init__() + # SCPA + SCPA_block_f = functools.partial(SCPA, nf=nf, reduction=2) + self.scale = scale + + ### first convolution + self.conv_first = nn.Conv2D(in_nc, nf, 3, 1, 1) + + ### main blocks + self.SCPA_trunk = make_multi_blocks(SCPA_block_f, nb) + self.trunk_conv = nn.Conv2D(nf, nf, 3, 1, 1) + + #### upsampling + self.upconv1 = nn.Conv2D(nf, unf, 3, 1, 1) + self.att1 = PA(unf) + self.HRconv1 = nn.Conv2D(unf, unf, 3, 1, 1) + + if self.scale == 4: + self.upconv2 = nn.Conv2D(unf, unf, 3, 1, 1) + self.att2 = PA(unf) + self.HRconv2 = nn.Conv2D(unf, unf, 3, 1, 1) + + self.conv_last = nn.Conv2D(unf, out_nc, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + + def forward(self, x): + + fea = self.conv_first(x) + trunk = self.trunk_conv(self.SCPA_trunk(fea)) + fea = fea + trunk + + if self.scale == 2 or self.scale == 3: + fea = self.upconv1( + F.interpolate(fea, scale_factor=self.scale, mode='nearest')) + fea = self.lrelu(self.att1(fea)) + fea = self.lrelu(self.HRconv1(fea)) + elif self.scale == 4: + fea = self.upconv1( + F.interpolate(fea, scale_factor=2, mode='nearest')) + fea = self.lrelu(self.att1(fea)) + fea = self.lrelu(self.HRconv1(fea)) + fea = self.upconv2( + F.interpolate(fea, scale_factor=2, mode='nearest')) + fea = self.lrelu(self.att2(fea)) + fea = self.lrelu(self.HRconv2(fea)) + + out = self.conv_last(fea) + + ILR = F.interpolate(x, + scale_factor=self.scale, + mode='bilinear', + align_corners=False) + out = out + ILR + return out diff --git a/ppgan/models/sr_model.py b/ppgan/models/sr_model.py index 5a48d86..767bf27 100644 --- a/ppgan/models/sr_model.py +++ b/ppgan/models/sr_model.py @@ -20,13 +20,14 @@ from .criterions.builder import build_criterion from .base_model import BaseModel from .builder import MODELS from ..utils.visual import tensor2img +from ..modules.init import reset_parameters @MODELS.register() class BaseSRModel(BaseModel): """Base SR model for single image super-resolution. """ - def __init__(self, generator, pixel_criterion=None): + def __init__(self, generator, pixel_criterion=None, use_init_weight=False): """ Args: generator (dict): config of generator. @@ -38,6 +39,8 @@ class BaseSRModel(BaseModel): if pixel_criterion: self.pixel_criterion = build_criterion(pixel_criterion) + if use_init_weight: + init_sr_weight(self.nets['generator']) def setup_input(self, input): self.lq = paddle.to_tensor(input['lq']) @@ -78,3 +81,12 @@ class BaseSRModel(BaseModel): if metrics is not None: for metric in metrics.values(): metric.update(out_img, gt_img) + + +def init_sr_weight(net): + def reset_func(m): + if hasattr(m, 'weight') and (not isinstance( + m, (nn.BatchNorm, nn.BatchNorm2D))): + reset_parameters(m) + + net.apply(reset_func) -- GitLab