未验证 提交 64181fa9 编写于 作者: W wangna11BD 提交者: GitHub

add edvr model (#208)

* add edvr model

* modifying code formats and comments

* modifying code formats and comments

* modifying code formats and comments

* add notes
Co-authored-by: NLielinJiang <50691816+LielinJiang@users.noreply.github.com>
上级 e72aae53
total_iters: 600000
output_dir: output_dir
checkpoints_dir: checkpoints
# tensor range for function tensor2img
min_max:
(0., 1.)
model:
name: EDVRModel
tsa_iter: 50000
generator:
name: EDVRNet
in_nf: 3
out_nf: 3
scale_factor: 4
nf: 64
nframes: 5
groups: 8
front_RBs: 5
back_RBs: 10
center: 2
predeblur: False
HR_in: False
w_TSA: True
TSA_only: False
pixel_criterion:
name: CharbonnierLoss
dataset:
train:
name: REDSDataset
mode: train
gt_folder: data/REDS/train_sharp/X4
lq_folder: data/REDS/train_sharp_bicubic/X4
img_format: png
crop_size: 256
interval_list: [1]
random_reverse: False
number_frames: 5
use_flip: True
use_rot: True
buf_size: 1024
scale: 4
fix_random_seed: 10
num_workers: 3
batch_size: 4
test:
name: REDSDataset
mode: test
gt_folder: data/REDS/REDS4_test_sharp/X4
lq_folder: data/REDS/REDS4_test_sharp_bicubic/X4
img_format: png
interval_list: [1]
random_reverse: False
number_frames: 5
batch_size: 1
use_flip: False
use_rot: False
buf_size: 1024
scale: 4
fix_random_seed: 10
lr_scheduler:
name: CosineAnnealingRestartLR
learning_rate: !!float 4e-4
periods: [50000, 100000, 150000, 150000, 150000]
restart_weights: [1, 1, 1, 1, 1]
eta_min: !!float 1e-7
optimizer:
name: Adam
# add parameters of net_name to optim
# name should in self.nets
net_names:
- generator
beta1: 0.9
beta2: 0.99
validate:
interval: 5000
save_img: false
metrics:
psnr: # metric name, can be arbitrary
name: PSNR
crop_border: 0
test_y_channel: False
ssim:
name: SSIM
crop_border: 0
test_y_channel: False
log_config:
interval: 10
visiual_interval: 5000
snapshot_config:
interval: 5000
total_iters: 600000
output_dir: output_dir
checkpoints_dir: checkpoints
# tensor range for function tensor2img
min_max:
(0., 1.)
model:
name: EDVRModel
tsa_iter: 0
generator:
name: EDVRNet
in_nf: 3
out_nf: 3
scale_factor: 4
nf: 64
nframes: 5
groups: 8
front_RBs: 5
back_RBs: 10
center: 2
predeblur: False
HR_in: False
w_TSA: False
TSA_only: False
pixel_criterion:
name: CharbonnierLoss
dataset:
train:
name: REDSDataset
mode: train
gt_folder: data/REDS/train_sharp/X4
lq_folder: data/REDS/train_sharp_bicubic/X4
img_format: png
crop_size: 256
interval_list: [1]
random_reverse: False
number_frames: 5
use_flip: True
use_rot: True
buf_size: 1024
scale: 4
fix_random_seed: 10
num_workers: 3
batch_size: 4
test:
name: REDSDataset
mode: test
gt_folder: data/REDS/REDS4_test_sharp/X4
lq_folder: data/REDS/REDS4_test_sharp_bicubic/X4
img_format: png
interval_list: [1]
random_reverse: False
number_frames: 5
batch_size: 1
use_flip: False
use_rot: False
buf_size: 1024
scale: 4
fix_random_seed: 10
lr_scheduler:
name: CosineAnnealingRestartLR
learning_rate: !!float 4e-4
periods: [150000, 150000, 150000, 150000]
restart_weights: [1, 1, 1, 1]
eta_min: !!float 1e-7
optimizer:
name: Adam
# add parameters of net_name to optim
# name should in self.nets
net_names:
- generator
beta1: 0.9
beta2: 0.99
validate:
interval: 5000
save_img: false
metrics:
psnr: # metric name, can be arbitrary
name: PSNR
crop_border: 0
test_y_channel: False
ssim:
name: SSIM
crop_border: 0
test_y_channel: False
log_config:
interval: 10
visiual_interval: 500
snapshot_config:
interval: 5000
......@@ -21,3 +21,4 @@ from .common_vision_dataset import CommonVisionDataset
from .animeganv2_dataset import AnimeGANV2Dataset
from .wav2lip_dataset import Wav2LipDataset
from .starganv2_dataset import StarGANv2Dataset
from .edvr_dataset import REDSDataset
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import random
import numpy as np
import scipy.io as scio
import cv2
import paddle
from paddle.io import Dataset, DataLoader
from .builder import DATASETS
logger = logging.getLogger(__name__)
@DATASETS.register()
class REDSDataset(Dataset):
"""
REDS dataset for EDVR model
"""
def __init__(self,
mode,
lq_folder,
gt_folder,
img_format="png",
crop_size=256,
interval_list=[1],
random_reverse=False,
number_frames=5,
batch_size=32,
use_flip=False,
use_rot=False,
buf_size=1024,
scale=4,
fix_random_seed=False):
super(REDSDataset, self).__init__()
self.format = img_format
self.mode = mode
self.crop_size = crop_size
self.interval_list = interval_list
self.random_reverse = random_reverse
self.number_frames = number_frames
self.batch_size = batch_size
self.fileroot = lq_folder
self.use_flip = use_flip
self.use_rot = use_rot
self.buf_size = buf_size
self.fix_random_seed = fix_random_seed
if self.mode != 'infer':
self.gtroot = gt_folder
self.scale = scale
self.LR_input = (self.scale > 1)
if self.fix_random_seed:
random.seed(10)
np.random.seed(10)
self.num_reader_threads = 1
self._init_()
def _init_(self):
logger.info('initialize reader ... ')
print("initialize reader")
self.filelist = []
for video_name in os.listdir(self.fileroot):
if (self.mode == 'train') and (video_name in [
'000', '011', '015', '020'
]): #These four videos are used as val
continue
for frame_name in os.listdir(os.path.join(self.fileroot,
video_name)):
frame_idx = frame_name.split('.')[0]
video_frame_idx = video_name + '_' + str(frame_idx)
# for each item in self.filelist is like '010_00000015', '260_00000090'
self.filelist.append(video_frame_idx)
if self.mode == 'test':
self.filelist.sort()
print(len(self.filelist))
def __getitem__(self, index):
"""Get training sample
return: lq:[5,3,W,H],
gt:[3,W,H],
lq_path:str
"""
item = self.filelist[index]
img_LQs, img_GT = self.get_sample_data(
item, self.number_frames, self.interval_list, self.random_reverse,
self.gtroot, self.fileroot, self.LR_input, self.crop_size,
self.scale, self.use_flip, self.use_rot, self.mode)
return {'lq': img_LQs, 'gt': img_GT, 'lq_path': self.filelist[index]}
def get_sample_data(self,
item,
number_frames,
interval_list,
random_reverse,
gtroot,
fileroot,
LR_input,
crop_size,
scale,
use_flip,
use_rot,
mode='train'):
video_name = item.split('_')[0]
frame_name = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
ngb_frames, name_b = self.get_neighbor_frames(frame_name, \
number_frames=number_frames, \
interval_list=interval_list, \
random_reverse=random_reverse)
elif mode == 'test':
ngb_frames, name_b = self.get_test_neighbor_frames(
int(frame_name), number_frames)
else:
raise NotImplementedError('mode {} not implemented'.format(mode))
frame_name = name_b
img_GT = self.read_img(
os.path.join(gtroot, video_name, frame_name + '.png'))
frame_list = []
for ngb_frm in ngb_frames:
ngb_name = "%08d" % ngb_frm
img = self.read_img(
os.path.join(fileroot, video_name, ngb_name + '.png'))
frame_list.append(img)
H, W, C = frame_list[0].shape
# add random crop
if (mode == 'train') or (mode == 'valid'):
if LR_input:
LQ_size = crop_size // scale
rnd_h = random.randint(0, max(0, H - LQ_size))
rnd_w = random.randint(0, max(0, W - LQ_size))
frame_list = [
v[rnd_h:rnd_h + LQ_size, rnd_w:rnd_w + LQ_size, :]
for v in frame_list
]
rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale)
img_GT = img_GT[rnd_h_HR:rnd_h_HR + crop_size,
rnd_w_HR:rnd_w_HR + crop_size, :]
else:
rnd_h = random.randint(0, max(0, H - crop_size))
rnd_w = random.randint(0, max(0, W - crop_size))
frame_list = [
v[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :]
for v in frame_list
]
img_GT = img_GT[rnd_h:rnd_h + crop_size,
rnd_w:rnd_w + crop_size, :]
# add random flip and rotation
frame_list.append(img_GT)
if (mode == 'train') or (mode == 'valid'):
rlt = self.img_augment(frame_list, use_flip, use_rot)
else:
rlt = frame_list
frame_list = rlt[0:-1]
img_GT = rlt[-1]
# stack LQ images to NHWC, N is the frame number
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_GT = img_GT[:, :, [2, 1, 0]]
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_GT = np.transpose(img_GT, (2, 0, 1)).astype('float32')
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, img_GT
def get_neighbor_frames(self,
frame_name,
number_frames,
interval_list,
random_reverse,
max_frame=99,
bordermode=False):
center_frame_idx = int(frame_name)
half_N_frames = number_frames // 2
interval = random.choice(interval_list)
if bordermode:
direction = 1
if random_reverse and random.random() < 0.5:
direction = random.choice([0, 1])
if center_frame_idx + interval * (number_frames - 1) > max_frame:
direction = 0
elif center_frame_idx - interval * (number_frames - 1) < 0:
direction = 1
if direction == 1:
neighbor_list = list(
range(center_frame_idx,
center_frame_idx + interval * number_frames,
interval))
else:
neighbor_list = list(
range(center_frame_idx,
center_frame_idx - interval * number_frames,
-interval))
name_b = '{:08d}'.format(neighbor_list[0])
else:
# ensure not exceeding the borders
while (center_frame_idx + half_N_frames * interval > max_frame) or (
center_frame_idx - half_N_frames * interval < 0):
center_frame_idx = random.randint(0, max_frame)
neighbor_list = list(
range(center_frame_idx - half_N_frames * interval,
center_frame_idx + half_N_frames * interval + 1,
interval))
if random_reverse and random.random() < 0.5:
neighbor_list.reverse()
name_b = '{:08d}'.format(neighbor_list[half_N_frames])
assert len(neighbor_list) == number_frames, \
"frames slected have length({}), but it should be ({})".format(len(neighbor_list), number_frames)
return neighbor_list, name_b
def read_img(self, path, size=None):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]
"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
# some images have 4 channels
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def img_augment(self, img_list, hflip=True, rot=True):
"""horizontal flip OR rotate (0, 90, 180, 270 degrees)
"""
hflip = hflip and random.random() < 0.5
vflip = rot and random.random() < 0.5
rot90 = rot and random.random() < 0.5
def _augment(img):
if hflip:
img = img[:, ::-1, :]
if vflip:
img = img[::-1, :, :]
if rot90:
img = img.transpose(1, 0, 2)
return img
return [_augment(img) for img in img_list]
def get_test_neighbor_frames(self, crt_i, N, max_n=100, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
print(return_l)
name_b = '{:08d}'.format(crt_i)
return return_l, name_b
def __len__(self):
"""Return the total number of images in the dataset.
"""
return len(self.filelist)
......@@ -27,3 +27,4 @@ from .styleganv2_model import StyleGAN2Model
from .wav2lip_model import Wav2LipModel
from .wav2lip_hq_model import Wav2LipModelHq
from .starganv2_model import StarGANv2Model
from .edvr_model import EDVRModel
from .gan_loss import GANLoss
from .perceptual_loss import PerceptualLoss
from .pixel_loss import L1Loss, MSELoss
from .pixel_loss import L1Loss, MSELoss, CharbonnierLoss
from .builder import build_criterion
......@@ -49,6 +49,27 @@ class L1Loss():
return self.loss_weight * self._l1_loss(pred, target)
@CRITERIONS.register()
class CharbonnierLoss():
"""Charbonnier Loss (L1).
Args:
eps (float): Default: 1e-12.
"""
def __init__(self, eps=1e-12):
self.eps = eps
def __call__(self, pred, target, **kwargs):
"""Forward Function.
Args:
pred (Tensor): of shape (N, C, H, W). Predicted tensor.
target (Tensor): of shape (N, C, H, W). Ground truth tensor.
"""
return paddle.sum(paddle.sqrt((pred - target)**2 + self.eps))
@CRITERIONS.register()
class MSELoss():
"""MSE (L2) loss.
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from .builder import MODELS
from .sr_model import BaseSRModel
from .generators.edvr import ResidualBlockNoBN
from ..modules.init import reset_parameters
@MODELS.register()
class EDVRModel(BaseSRModel):
"""EDVR Model.
Paper: EDVR: Video Restoration with Enhanced Deformable Convolutional Networks.
"""
def __init__(self, generator, tsa_iter, pixel_criterion=None):
"""Initialize the EDVR class.
Args:
generator (dict): config of generator.
tsa_iter (dict): config of tsa_iter.
pixel_criterion (dict): config of pixel criterion.
"""
super(EDVRModel, self).__init__(generator, pixel_criterion)
self.tsa_iter = tsa_iter
self.current_iter = 1
init_edvr_weight(self.nets['generator'])
def setup_input(self, input):
self.lq = paddle.to_tensor(input['lq'])
self.visual_items['lq'] = self.lq[:, 2, :, :, :]
self.visual_items['lq-2'] = self.lq[:, 0, :, :, :]
self.visual_items['lq-1'] = self.lq[:, 1, :, :, :]
self.visual_items['lq+1'] = self.lq[:, 3, :, :, :]
self.visual_items['lq+2'] = self.lq[:, 4, :, :, :]
if 'gt' in input:
self.gt = paddle.to_tensor(input['gt'])
self.visual_items['gt'] = self.gt
self.image_paths = input['lq_path']
def train_iter(self, optims=None):
optims['optim'].clear_grad()
if self.tsa_iter:
if self.current_iter == 1:
print('Only train TSA module for', self.tsa_iter, 'iters.')
for name, param in self.nets['generator'].named_parameters():
if 'TSAModule' not in name:
param.trainable = False
elif self.current_iter == self.tsa_iter + 1:
print('Train all the parameters.')
for param in self.nets['generator'].parameters():
param.trainable = True
self.output = self.nets['generator'](self.lq)
self.visual_items['output'] = self.output
# pixel loss
loss_pixel = self.pixel_criterion(self.output, self.gt)
self.losses['loss_pixel'] = loss_pixel
loss_pixel.backward()
optims['optim'].step()
self.current_iter += 1
def init_edvr_weight(net):
def reset_func(m):
if hasattr(m,
'weight') and (not isinstance(m,
(nn.BatchNorm, nn.BatchNorm2D))
) and (not isinstance(m, ResidualBlockNoBN)):
reset_parameters(m)
net.apply(reset_func)
......@@ -27,4 +27,5 @@ from .generator_styleganv2 import StyleGANv2Generator
from .generator_pixel2style2pixel import Pixel2Style2Pixel
from .drn import DRNGenerator
from .generator_starganv2 import StarGANv2Generator, StarGANv2Style, StarGANv2Mapping, FAN
from .edvr import EDVRNet
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
from paddle.fluid.layers import deformable_conv
from paddle.fluid import core, layers
from paddle.fluid.layers import nn, utils
from paddle.nn import Layer
from paddle.fluid.initializer import Normal
from paddle.common_ops_import import *
class DeformConv2D(Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
deformable_groups=1,
groups=1,
weight_attr=None,
bias_attr=None):
super(DeformConv2D, self).__init__()
assert weight_attr is not False, "weight_attr should not be False in Conv."
self._weight_attr = weight_attr
self._bias_attr = bias_attr
self._deformable_groups = deformable_groups
self._groups = groups
self._in_channels = in_channels
self._out_channels = out_channels
self.padding = padding
self.stride = stride
self._channel_dim = 1
self._stride = utils.convert_to_list(stride, 2, 'stride')
self._dilation = utils.convert_to_list(dilation, 2, 'dilation')
self._kernel_size = utils.convert_to_list(kernel_size, 2, 'kernel_size')
if in_channels % groups != 0:
raise ValueError("in_channels must be divisible by groups.")
self._padding = utils.convert_to_list(padding, 2, 'padding')
filter_shape = [out_channels, in_channels // groups] + self._kernel_size
def _get_default_param_initializer():
filter_elem_num = np.prod(self._kernel_size) * self._in_channels
std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0)
self.weight = self.create_parameter(
shape=filter_shape,
attr=self._weight_attr,
default_initializer=_get_default_param_initializer())
self.bias = self.create_parameter(
attr=self._bias_attr, shape=[self._out_channels], is_bias=True)
def forward(self, x, offset, mask):
out = deform_conv2d(
x=x,
offset=offset,
mask=mask,
weight=self.weight,
bias=self.bias,
stride=self._stride,
padding=self._padding,
dilation=self._dilation,
deformable_groups=self._deformable_groups,
groups=self._groups,
)
return out
def deform_conv2d(x,
offset,
weight,
mask,
bias=None,
stride=1,
padding=0,
dilation=1,
deformable_groups=1,
groups=1,
name=None):
stride = utils.convert_to_list(stride, 2, 'stride')
padding = utils.convert_to_list(padding, 2, 'padding')
dilation = utils.convert_to_list(dilation, 2, 'dilation')
use_deform_conv2d_v1 = True if mask is None else False
if in_dygraph_mode():
attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation, 'deformable_groups',deformable_groups,
'groups', groups, 'im2col_step', 1)
if use_deform_conv2d_v1:
op_type = 'deformable_conv_v1'
pre_bias = getattr(core.ops, op_type)(x, offset, weight, *attrs)
else:
op_type = 'deformable_conv'
pre_bias = getattr(core.ops, op_type)(x, offset, mask, weight,
*attrs)
if bias is not None:
out = nn.elementwise_add(pre_bias, bias, axis=1)
else:
out = pre_bias
return out
class DeformableConv_dygraph(Layer):
def __init__(self,num_filters,filter_size,dilation,
stride,padding,deformable_groups=1,groups=1):
super(DeformableConv_dygraph, self).__init__()
self.num_filters = num_filters
self.filter_size = filter_size
self.dilation = dilation
self.stride = stride
self.padding = padding
self.deformable_groups = deformable_groups
self.groups = groups
self.defor_conv = DeformConv2D(in_channels=self.num_filters, out_channels=self.num_filters,
kernel_size=self.filter_size, stride=self.stride, padding=self.padding,
dilation=self.dilation, deformable_groups=self.deformable_groups, groups=self.groups, weight_attr=None, bias_attr=None)
def forward(self,*input):
x = input[0]
offset = input[1]
mask = input[2]
out = self.defor_conv(x, offset, mask)
return out
......@@ -324,3 +324,10 @@ def init_weights(net,
logger = get_logger()
logger.debug('initialize network with %s' % init_type)
net.apply(init_func) # apply the initialization function <init_func>
def reset_parameters(m):
kaiming_uniform_(m.weight, a=math.sqrt(5))
if m.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out(m.weight)
bound = 1 / math.sqrt(fan_in)
uniform_(m.bias, -bound, bound)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册