未验证 提交 5972bf7f 编写于 作者: L LielinJiang 提交者: GitHub

Add RealSR (#143)

* add realsr model
上级 89dbb63f
total_iters: 60000
output_dir: output_dir
# tensor range for function tensor2img
min_max:
(0., 1.)
model:
name: ESRGAN
generator:
name: RRDBNet
in_nc: 3
out_nc: 3
nf: 64
nb: 23
discriminator:
name: VGGDiscriminator128
in_channels: 3
num_feat: 64
pixel_criterion:
name: L1Loss
loss_weight: !!float 1e-2
perceptual_criterion:
name: PerceptualLoss
layer_weights:
'34': 1.0
perceptual_weight: 1.0
style_weight: 0.0
norm_img: False
gan_criterion:
name: GANLoss
gan_mode: vanilla
loss_weight: !!float 5e-3
dataset:
train:
name: SRDataset
gt_folder: data/realsr_preprocess/DF2K/generated/tdsr/HR_sub/
lq_folder: data/realsr_preprocess/DF2K/generated/tdsr/LR_sub/
num_workers: 4
batch_size: 16
scale: 4
preprocess:
- name: LoadImageFromFile
key: lq
- name: LoadImageFromFile
key: gt
- name: Transforms
input_keys: [lq, gt]
pipeline:
- name: SRPairedRandomCrop
gt_patch_size: 128
scale: 4
keys: [image, image]
- name: PairedRandomHorizontalFlip
keys: [image, image]
- name: PairedRandomVerticalFlip
keys: [image, image]
- name: PairedRandomTransposeHW
keys: [image, image]
- name: Transpose
keys: [image, image]
- name: Normalize
mean: [0., .0, 0.]
std: [255., 255., 255.]
keys: [image, image]
- name: SRNoise
noise_path: data/realsr_preprocess/DF2K/Corrupted_noise/
size: 32
keys: [image]
test:
name: SRDataset
gt_folder: data/DIV2K/val_set14/Set14
lq_folder: data/DIV2K/val_set14/Set14_bicLRx4
scale: 4
preprocess:
- name: LoadImageFromFile
key: lq
- name: LoadImageFromFile
key: gt
- name: Transforms
input_keys: [lq, gt]
pipeline:
- name: Transpose
keys: [image, image]
- name: Normalize
mean: [0., .0, 0.]
std: [255., 255., 255.]
keys: [image, image]
lr_scheduler:
name: MultiStepDecay
learning_rate: 0.0001
milestones: [5000, 10000, 20000, 30000]
gamma: 0.5
optimizer:
optimG:
name: Adam
net_names:
- generator
weight_decay: 0.0
beta1: 0.9
beta2: 0.999
optimD:
name: Adam
net_names:
- discriminator
weight_decay: 0.0
beta1: 0.9
beta2: 0.999
validate:
interval: 5000
save_img: false
metrics:
psnr: # metric name, can be arbitrary
name: PSNR
crop_border: 4
test_y_channel: false
ssim:
name: SSIM
crop_border: 4
test_y_channel: false
log_config:
interval: 100
visiual_interval: 500
snapshot_config:
interval: 5000
total_iters: 60000
output_dir: output_dir
# tensor range for function tensor2img
min_max:
(0., 1.)
model:
name: ESRGAN
generator:
name: RRDBNet
in_nc: 3
out_nc: 3
nf: 64
nb: 23
discriminator:
name: VGGDiscriminator128
in_channels: 3
num_feat: 64
pixel_criterion:
name: L1Loss
loss_weight: !!float 1e-2
perceptual_criterion:
name: PerceptualLoss
layer_weights:
'34': 1.0
perceptual_weight: 1.0
style_weight: 0.0
norm_img: False
gan_criterion:
name: GANLoss
gan_mode: vanilla
loss_weight: !!float 5e-3
dataset:
train:
name: SRDataset
gt_folder: data/realsr_preprocess/DPED/generated/clean/train_tdsr/HR/
lq_folder: data/realsr_preprocess/DPED/generated/clean/train_tdsr/LR/
num_workers: 4
batch_size: 16
scale: 4
preprocess:
- name: LoadImageFromFile
key: lq
- name: LoadImageFromFile
key: gt
- name: Transforms
input_keys: [lq, gt]
pipeline:
- name: SRPairedRandomCrop
gt_patch_size: 128
scale: 4
keys: [image, image]
- name: PairedRandomHorizontalFlip
keys: [image, image]
- name: PairedRandomVerticalFlip
keys: [image, image]
- name: PairedRandomTransposeHW
keys: [image, image]
- name: Transpose
keys: [image, image]
- name: Normalize
mean: [0., .0, 0.]
std: [255., 255., 255.]
keys: [image, image]
- name: SRNoise
noise_path: data/realsr_preprocess/DPED/DPEDiphone_noise/
size: 32
keys: [image]
test:
name: SRDataset
gt_folder: data/DIV2K/val_set14/Set14
lq_folder: data/DIV2K/val_set14/Set14_bicLRx4
scale: 4
preprocess:
- name: LoadImageFromFile
key: lq
- name: LoadImageFromFile
key: gt
- name: Transforms
input_keys: [lq, gt]
pipeline:
- name: Transpose
keys: [image, image]
- name: Normalize
mean: [0., .0, 0.]
std: [255., 255., 255.]
keys: [image, image]
lr_scheduler:
name: MultiStepDecay
learning_rate: 0.0001
milestones: [5000, 10000, 20000, 30000]
gamma: 0.5
optimizer:
optimG:
name: Adam
net_names:
- generator
weight_decay: 0.0
beta1: 0.9
beta2: 0.999
optimD:
name: Adam
net_names:
- discriminator
weight_decay: 0.0
beta1: 0.9
beta2: 0.999
validate:
interval: 5000
save_img: false
metrics:
psnr: # metric name, can be arbitrary
name: PSNR
crop_border: 4
test_y_channel: false
ssim:
name: SSIM
crop_border: 4
test_y_channel: false
log_config:
interval: 100
visiual_interval: 500
snapshot_config:
interval: 5000
from PIL import Image
import numpy as np
import os.path as osp
import glob
import os
import argparse
import yaml
parser = argparse.ArgumentParser(description='create a dataset')
parser.add_argument('--dataset',
default='df2k',
type=str,
help='selecting different datasets')
parser.add_argument('--artifacts',
default='',
type=str,
help='selecting different artifacts type')
parser.add_argument('--cleanup_factor',
default=2,
type=int,
help='downscaling factor for image cleanup')
parser.add_argument('--upscale_factor',
default=4,
type=int,
choices=[4],
help='super resolution upscale factor')
opt = parser.parse_args()
# define input and target directories
with open('./preprocess/paths.yml', 'r') as stream:
PATHS = yaml.load(stream)
def noise_patch(rgb_img, sp, max_var, min_mean):
img = rgb_img.convert('L')
rgb_img = np.array(rgb_img)
img = np.array(img)
w, h = img.shape
collect_patchs = []
for i in range(0, w - sp, sp):
for j in range(0, h - sp, sp):
patch = img[i:i + sp, j:j + sp]
var_global = np.var(patch)
mean_global = np.mean(patch)
if var_global < max_var and mean_global > min_mean:
rgb_patch = rgb_img[i:i + sp, j:j + sp, :]
collect_patchs.append(rgb_patch)
return collect_patchs
if __name__ == '__main__':
if opt.dataset == 'df2k':
img_dir = PATHS[opt.dataset][opt.artifacts]['source']
noise_dir = PATHS['datasets']['df2k'] + '/Corrupted_noise'
sp = 256
max_var = 20
min_mean = 0
else:
img_dir = PATHS[opt.dataset][opt.artifacts]['hr']['train']
noise_dir = PATHS['datasets']['dped'] + '/DPEDiphone_noise'
sp = 256
max_var = 20
min_mean = 50
assert not os.path.exists(noise_dir)
os.mkdir(noise_dir)
img_paths = sorted(glob.glob(osp.join(img_dir, '*.png')))
cnt = 0
for path in img_paths:
img_name = osp.splitext(osp.basename(path))[0]
print('**********', img_name, '**********')
img = Image.open(path).convert('RGB')
patchs = noise_patch(img, sp, max_var, min_mean)
for idx, patch in enumerate(patchs):
save_path = osp.join(noise_dir,
'{}_{:03}.png'.format(img_name, idx))
cnt += 1
print('collect:', cnt, save_path)
Image.fromarray(patch).save(save_path)
import argparse
import os
import yaml
import utils
from PIL import Image
from tqdm import tqdm
import paddle
import paddle.vision.transforms.functional as TF
paddle.set_device('cpu')
parser = argparse.ArgumentParser(
description='Apply the trained model to create a dataset')
parser.add_argument('--checkpoint',
default=None,
type=str,
help='checkpoint model to use')
parser.add_argument('--artifacts',
default='',
type=str,
help='selecting different artifacts type')
parser.add_argument('--name',
default='',
type=str,
help='additional string added to folder path')
parser.add_argument('--dataset',
default='df2k',
type=str,
help='selecting different datasets')
parser.add_argument('--track',
default='train',
type=str,
help='selecting train or valid track')
parser.add_argument('--num_res_blocks',
default=8,
type=int,
help='number of ResNet blocks')
parser.add_argument('--cleanup_factor',
default=2,
type=int,
help='downscaling factor for image cleanup')
parser.add_argument('--upscale_factor',
default=4,
type=int,
choices=[4],
help='super resolution upscale factor')
opt = parser.parse_args()
# define input and target directories
with open('./paths.yml', 'r') as stream:
PATHS = yaml.load(stream)
if opt.dataset == 'df2k':
path_sdsr = PATHS['datasets']['df2k'] + '/generated/sdsr/'
path_tdsr = PATHS['datasets']['df2k'] + '/generated/tdsr/'
input_source_dir = PATHS['df2k']['tdsr']['source']
input_target_dir = PATHS['df2k']['tdsr']['target']
source_files = [
os.path.join(input_source_dir, x) for x in os.listdir(input_source_dir)
if utils.is_image_file(x)
]
target_files = [
os.path.join(input_target_dir, x) for x in os.listdir(input_target_dir)
if utils.is_image_file(x)
]
else:
path_sdsr = PATHS['datasets'][
opt.
dataset] + '/generated/' + opt.artifacts + '/' + opt.track + opt.name + '_sdsr/'
path_tdsr = PATHS['datasets'][
opt.
dataset] + '/generated/' + opt.artifacts + '/' + opt.track + opt.name + '_tdsr/'
input_source_dir = PATHS[opt.dataset][opt.artifacts]['hr'][opt.track]
input_target_dir = None
source_files = [
os.path.join(input_source_dir, x) for x in os.listdir(input_source_dir)
if utils.is_image_file(x)
]
target_files = []
tdsr_hr_dir = path_tdsr + 'HR'
tdsr_lr_dir = path_tdsr + 'LR'
assert not os.path.exists(PATHS['datasets'][opt.dataset])
if not os.path.exists(tdsr_hr_dir):
os.makedirs(tdsr_hr_dir)
if not os.path.exists(tdsr_lr_dir):
os.makedirs(tdsr_lr_dir)
# generate the noisy images
with paddle.no_grad():
for file in tqdm(source_files, desc='Generating images from source'):
# load HR image
input_img = Image.open(file)
input_img = TF.to_tensor(input_img)
# Resize HR image to clean it up and make sure it can be resized again
resize2_img = utils.imresize(input_img, 1.0 / opt.cleanup_factor, True)
_, w, h = resize2_img.shape
w = w - w % opt.upscale_factor
h = h - h % opt.upscale_factor
resize2_cut_img = resize2_img[:, :w, :h]
# Save resize2_cut_img as HR image for TDSR
path = os.path.join(tdsr_hr_dir, os.path.basename(file))
utils.to_pil_image(resize2_cut_img).save(path, 'PNG')
# Generate resize3_cut_img and apply model
resize3_cut_img = utils.imresize(resize2_cut_img,
1.0 / opt.upscale_factor, True)
# Save resize3_cut_noisy_img as LR image for TDSR
path = os.path.join(tdsr_lr_dir, os.path.basename(file))
utils.to_pil_image(resize3_cut_img).save(path, 'PNG')
for file in tqdm(target_files, desc='Generating images from target'):
# load HR image
input_img = Image.open(file)
input_img = TF.to_tensor(input_img)
# Save input_img as HR image for TDSR
path = os.path.join(tdsr_hr_dir, os.path.basename(file))
utils.to_pil_image(input_img).save(path, 'PNG')
# generate resized version of input_img
resize_img = utils.imresize(input_img, 1.0 / opt.upscale_factor, True)
# Save resize_noisy_img as LR image for TDSR
path = os.path.join(tdsr_lr_dir, os.path.basename(file))
utils.to_pil_image(resize_img).save(path, 'PNG')
import os
import yaml
import glob
import utils
import argparse
import numpy as np
from PIL import Image
from tqdm import tqdm
from imresize import imresize
from scipy.io import loadmat
import paddle
import paddle.vision.transforms.functional as TF
paddle.set_device('cpu')
parser = argparse.ArgumentParser(
description='Apply the trained model to create a dataset')
parser.add_argument('--kernel_path',
default='./preprocess/KernelGAN/results',
type=str,
help='kernel path to use')
parser.add_argument('--artifacts',
default='',
type=str,
help='selecting different artifacts type')
parser.add_argument('--name',
default='',
type=str,
help='additional string added to folder path')
parser.add_argument('--dataset',
default='df2k',
type=str,
help='selecting different datasets')
parser.add_argument('--track',
default='train',
type=str,
help='selecting train or valid track')
parser.add_argument('--num_res_blocks',
default=8,
type=int,
help='number of ResNet blocks')
parser.add_argument('--cleanup_factor',
default=2,
type=int,
help='downscaling factor for image cleanup')
parser.add_argument('--upscale_factor',
default=4,
type=int,
choices=[4],
help='super resolution upscale factor')
opt = parser.parse_args()
# define input and target directories
with open('./paths.yml', 'r') as stream:
PATHS = yaml.load(stream)
if opt.dataset == 'df2k':
path_sdsr = PATHS['datasets']['df2k'] + '/generated/sdsr/'
path_tdsr = PATHS['datasets']['df2k'] + '/generated/tdsr/'
input_source_dir = PATHS['df2k']['tdsr']['source']
input_target_dir = PATHS['df2k']['tdsr']['target']
source_files = [
os.path.join(input_source_dir, x) for x in os.listdir(input_source_dir)
if utils.is_image_file(x)
]
target_files = [
os.path.join(input_target_dir, x) for x in os.listdir(input_target_dir)
if utils.is_image_file(x)
]
else:
path_sdsr = PATHS['datasets'][
opt.
dataset] + '/generated/' + opt.artifacts + '/' + opt.track + opt.name + '_sdsr/'
path_tdsr = PATHS['datasets'][
opt.
dataset] + '/generated/' + opt.artifacts + '/' + opt.track + opt.name + '_tdsr/'
input_source_dir = PATHS[opt.dataset][opt.artifacts]['hr'][opt.track]
input_target_dir = None
source_files = [
os.path.join(input_source_dir, x) for x in os.listdir(input_source_dir)
if utils.is_image_file(x)
]
target_files = []
tdsr_hr_dir = path_tdsr + 'HR'
tdsr_lr_dir = path_tdsr + 'LR'
assert not os.path.exists(PATHS['datasets'][opt.dataset])
if not os.path.exists(tdsr_hr_dir):
os.makedirs(tdsr_hr_dir)
if not os.path.exists(tdsr_lr_dir):
os.makedirs(tdsr_lr_dir)
kernel_paths = glob.glob(os.path.join(opt.kernel_path, '*/*_kernel_x4.mat'))
kernel_num = len(kernel_paths)
print('kernel_num: ', kernel_num)
# generate the noisy images
with paddle.no_grad():
for file in tqdm(source_files, desc='Generating images from source'):
# load HR image
input_img = Image.open(file)
input_img = TF.to_tensor(input_img)
# Resize HR image to clean it up and make sure it can be resized again
resize2_img = utils.imresize(input_img, 1.0 / opt.cleanup_factor, True)
_, w, h = resize2_img.shape
w = w - w % opt.upscale_factor
h = h - h % opt.upscale_factor
resize2_cut_img = resize2_img[:, :w, :h]
# Save resize2_cut_img as HR image for TDSR
path = os.path.join(tdsr_hr_dir, os.path.basename(file))
resize2_cut_img = utils.to_pil_image(resize2_cut_img)
resize2_cut_img.save(path, 'PNG')
# Generate resize3_cut_img and apply model
kernel_path = kernel_paths[np.random.randint(0, kernel_num)]
mat = loadmat(kernel_path)
k = np.array([mat['Kernel']]).squeeze()
resize3_cut_img = imresize(np.array(resize2_cut_img),
scale_factor=1.0 / opt.upscale_factor,
kernel=k)
# Save resize3_cut_img as LR image for TDSR
path = os.path.join(tdsr_lr_dir, os.path.basename(file))
utils.to_pil_image(resize3_cut_img).save(path, 'PNG')
for file in tqdm(target_files, desc='Generating images from target'):
# load HR image
input_img = Image.open(file)
input_img = TF.to_tensor(input_img)
# Save input_img as HR image for TDSR
path = os.path.join(tdsr_hr_dir, os.path.basename(file))
HR_img = utils.to_pil_image(input_img)
HR_img.save(path, 'PNG')
# generate resized version of input_img
kernel_path = kernel_paths[np.random.randint(0, kernel_num)]
mat = loadmat(kernel_path)
k = np.array([mat['Kernel']]).squeeze()
resize_img = imresize(np.array(HR_img),
scale_factor=1.0 / opt.upscale_factor,
kernel=k)
# Save resize_noisy_img as LR image for TDSR
path = os.path.join(tdsr_lr_dir, os.path.basename(file))
utils.to_pil_image(resize_img).save(path, 'PNG')
# reference from kernelgan
import numpy as np
from scipy.ndimage import filters, measurements, interpolation
from math import pi
def imresize(im,
scale_factor=None,
output_shape=None,
kernel=None,
antialiasing=True,
kernel_shift_flag=False):
# First standardize values and fill missing arguments (if needed) by deriving scale from output shape or vice versa
scale_factor, output_shape = fix_scale_and_size(im.shape, output_shape,
scale_factor)
# For a given numeric kernel case, just do convolution and sub-sampling (downscaling only)
if type(kernel) == np.ndarray and scale_factor[0] <= 1:
return numeric_kernel(im, kernel, scale_factor, output_shape,
kernel_shift_flag)
# Choose interpolation method, each method has the matching kernel size
method, kernel_width = {
"cubic": (cubic, 4.0),
"lanczos2": (lanczos2, 4.0),
"lanczos3": (lanczos3, 6.0),
"box": (box, 1.0),
"linear": (linear, 2.0),
None: (cubic, 4.0) # Default interpolation method is cubic
}.get(kernel)
# Antialiasing is only used when downscaling
antialiasing *= (scale_factor[0] < 1)
# Sort indices of dimensions according to scale of each dimension. since we are going dim by dim this is efficient
sorted_dims = np.argsort(np.array(scale_factor)).tolist()
# Iterate over dimensions to calculate local weights for resizing and resize each time in one direction
out_im = np.copy(im)
for dim in sorted_dims:
# No point doing calculations for scale-factor 1. nothing will happen anyway
if scale_factor[dim] == 1.0:
continue
# for each coordinate (along 1 dim), calculate which coordinates in the input image affect its result and the
# weights that multiply the values there to get its result.
weights, field_of_view = contributions(im.shape[dim], output_shape[dim],
scale_factor[dim], method,
kernel_width, antialiasing)
# Use the affecting position values and the set of weights to calculate the result of resizing along this 1 dim
out_im = resize_along_dim(out_im, dim, weights, field_of_view)
return out_im
def fix_scale_and_size(input_shape, output_shape, scale_factor):
# First fixing the scale-factor (if given) to be standardized the function expects (a list of scale factors in the
# same size as the number of input dimensions)
if scale_factor is not None:
# By default, if scale-factor is a scalar we assume 2d resizing and duplicate it.
if np.isscalar(scale_factor):
scale_factor = [scale_factor, scale_factor]
# We extend the size of scale-factor list to the size of the input by assigning 1 to all the unspecified scales
scale_factor = list(scale_factor)
scale_factor.extend([1] * (len(input_shape) - len(scale_factor)))
# Fixing output-shape (if given): extending it to the size of the input-shape, by assigning the original input-size
# to all the unspecified dimensions
if output_shape is not None:
output_shape = list(np.uint(np.array(output_shape))) + list(
input_shape[len(output_shape):])
# Dealing with the case of non-give scale-factor, calculating according to output-shape. note that this is
# sub-optimal, because there can be different scales to the same output-shape.
if scale_factor is None:
scale_factor = 1.0 * np.array(output_shape) / np.array(input_shape)
# Dealing with missing output-shape. calculating according to scale-factor
if output_shape is None:
output_shape = np.uint(
np.ceil(np.array(input_shape) * np.array(scale_factor)))
return scale_factor, output_shape
def contributions(in_length, out_length, scale, kernel, kernel_width,
antialiasing):
# This function calculates a set of 'filters' and a set of field_of_view that will later on be applied
# such that each position from the field_of_view will be multiplied with a matching filter from the
# 'weights' based on the interpolation method and the distance of the sub-pixel location from the pixel centers
# around it. This is only done for one dimension of the image.
# When anti-aliasing is activated (default and only for downscaling) the receptive field is stretched to size of
# 1/sf. this means filtering is more 'low-pass filter'.
fixed_kernel = (
lambda arg: scale * kernel(scale * arg)) if antialiasing else kernel
kernel_width *= 1.0 / scale if antialiasing else 1.0
# These are the coordinates of the output image
out_coordinates = np.arange(1, out_length + 1)
# These are the matching positions of the output-coordinates on the input image coordinates.
# Best explained by example: say we have 4 horizontal pixels for HR and we downscale by SF=2 and get 2 pixels:
# [1,2,3,4] -> [1,2]. Remember each pixel number is the middle of the pixel.
# The scaling is done between the distances and not pixel numbers (the right boundary of pixel 4 is transformed to
# the right boundary of pixel 2. pixel 1 in the small image matches the boundary between pixels 1 and 2 in the big
# one and not to pixel 2. This means the position is not just multiplication of the old pos by scale-factor).
# So if we measure distance from the left border, middle of pixel 1 is at distance d=0.5, border between 1 and 2 is
# at d=1, and so on (d = p - 0.5). we calculate (d_new = d_old / sf) which means:
# (p_new-0.5 = (p_old-0.5) / sf) -> p_new = p_old/sf + 0.5 * (1-1/sf)
match_coordinates = 1.0 * out_coordinates / scale + 0.5 * (1 - 1.0 / scale)
# This is the left boundary to start multiplying the filter from, it depends on the size of the filter
left_boundary = np.floor(match_coordinates - kernel_width / 2)
# Kernel width needs to be enlarged because when covering has sub-pixel borders, it must 'see' the pixel centers
# of the pixels it only covered a part from. So we add one pixel at each side to consider (weights can zeroize them)
expanded_kernel_width = np.ceil(kernel_width) + 2
# Determine a set of field_of_view for each each output position, these are the pixels in the input image
# that the pixel in the output image 'sees'. We get a matrix whos horizontal dim is the output pixels (big) and the
# vertical dim is the pixels it 'sees' (kernel_size + 2)
field_of_view = np.squeeze(
np.uint(
np.expand_dims(left_boundary, axis=1) +
np.arange(expanded_kernel_width) - 1))
# Assign weight to each pixel in the field of view. A matrix whos horizontal dim is the output pixels and the
# vertical dim is a list of weights matching to the pixel in the field of view (that are specified in
# 'field_of_view')
weights = fixed_kernel(1.0 * np.expand_dims(match_coordinates, axis=1) -
field_of_view - 1)
# Normalize weights to sum up to 1. be careful from dividing by 0
sum_weights = np.sum(weights, axis=1)
sum_weights[sum_weights == 0] = 1.0
weights = 1.0 * weights / np.expand_dims(sum_weights, axis=1)
# We use this mirror structure as a trick for reflection padding at the boundaries
mirror = np.uint(
np.concatenate(
(np.arange(in_length), np.arange(in_length - 1, -1, step=-1))))
field_of_view = mirror[np.mod(field_of_view, mirror.shape[0])]
# Get rid of weights and pixel positions that are of zero weight
non_zero_out_pixels = np.nonzero(np.any(weights, axis=0))
weights = np.squeeze(weights[:, non_zero_out_pixels])
field_of_view = np.squeeze(field_of_view[:, non_zero_out_pixels])
# Final products are the relative positions and the matching weights, both are output_size X fixed_kernel_size
return weights, field_of_view
def resize_along_dim(im, dim, weights, field_of_view):
# To be able to act on each dim, we swap so that dim 0 is the wanted dim to resize
tmp_im = np.swapaxes(im, dim, 0)
# We add singleton dimensions to the weight matrix so we can multiply it with the big tensor we get for
# tmp_im[field_of_view.T], (bsxfun style)
weights = np.reshape(weights.T,
list(weights.T.shape) + (np.ndim(im) - 1) * [1])
# This is a bit of a complicated multiplication: tmp_im[field_of_view.T] is a tensor of order image_dims+1.
# for each pixel in the output-image it matches the positions the influence it from the input image (along 1 dim
# only, this is why it only adds 1 dim to the shape). We then multiply, for each pixel, its set of positions with
# the matching set of weights. we do this by this big tensor element-wise multiplication (MATLAB bsxfun style:
# matching dims are multiplied element-wise while singletons mean that the matching dim is all multiplied by the
# same number
tmp_out_im = np.sum(tmp_im[field_of_view.T] * weights, axis=0)
# Finally we swap back the axes to the original order
return np.swapaxes(tmp_out_im, dim, 0)
def numeric_kernel(im, kernel, scale_factor, output_shape, kernel_shift_flag):
# See kernel_shift function to understand what this is
if kernel_shift_flag:
kernel = kernel_shift(kernel, scale_factor)
# First run a correlation (convolution with flipped kernel)
out_im = np.zeros_like(im)
for channel in range(np.ndim(im)):
out_im[:, :, channel] = filters.correlate(im[:, :, channel], kernel)
# Then subsample and return
return out_im[
np.
round(np.linspace(0, im.shape[0] - 1 /
scale_factor[0], output_shape[0])).astype(int)[:,
None],
np.
round(np.linspace(0, im.shape[1] -
1 / scale_factor[1], output_shape[1])).astype(int), :]
def kernel_shift(kernel, sf):
# There are two reasons for shifting the kernel:
# 1. Center of mass is not in the center of the kernel which creates ambiguity. There is no possible way to know
# the degradation process included shifting so we always assume center of mass is center of the kernel.
# 2. We further shift kernel center so that top left result pixel corresponds to the middle of the sfXsf first
# pixels. Default is for odd size to be in the middle of the first pixel and for even sized kernel to be at the
# top left corner of the first pixel. that is why different shift size needed between od and even size.
# Given that these two conditions are fulfilled, we are happy and aligned, the way to test it is as follows:
# The input image, when interpolated (regular bicubic) is exactly aligned with ground truth.
# First calculate the current center of mass for the kernel
current_center_of_mass = measurements.center_of_mass(kernel)
# The second ("+ 0.5 * ....") is for applying condition 2 from the comments above
wanted_center_of_mass = np.array(
kernel.shape) // 2 + 0.5 * (sf - (kernel.shape[0] % 2))
# wanted_center_of_mass = np.array(kernel.shape) / 2 + 0.5 * (np.array(sf)[0:2] - (kernel.shape[0] % 2))
# Define the shift vector for the kernel shifting (x,y)
shift_vec = wanted_center_of_mass - current_center_of_mass
# Before applying the shift, we first pad the kernel so that nothing is lost due to the shift
# (biggest shift among dims + 1 for safety)
kernel = np.pad(kernel, np.int(np.ceil(np.max(shift_vec))) + 1, 'constant')
# Finally shift the kernel and return
return interpolation.shift(kernel, shift_vec)
# These next functions are all interpolation methods. x is the distance from the left pixel center
def cubic(x):
absx = np.abs(x)
absx2 = absx**2
absx3 = absx**3
return ((1.5 * absx3 - 2.5 * absx2 + 1) * (absx <= 1) +
(-0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2) * ((1 < absx) &
(absx <= 2)))
def lanczos2(x):
return (((np.sin(pi * x) * np.sin(pi * x / 2) + np.finfo(np.float32).eps) /
((pi**2 * x**2 / 2) + np.finfo(np.float32).eps)) * (abs(x) < 2))
def box(x):
return ((-0.5 <= x) & (x < 0.5)) * 1.0
def lanczos3(x):
return (((np.sin(pi * x) * np.sin(pi * x / 3) + np.finfo(np.float32).eps) /
((pi**2 * x**2 / 3) + np.finfo(np.float32).eps)) * (abs(x) < 3))
def linear(x):
return (x + 1) * ((-1 <= x) & (x < 0)) + (1 - x) * ((0 <= x) & (x <= 1))
df2k:
tdsr:
source: '/workspace/datasets/ntire20/Corrupted-tr-x'
target: '/workspace/datasets/ntire20/Corrupted-tr-y'
valid:
dped:
clean:
hr:
train: '/workspace/datasets/ntire20/DPEDiphone-tr-x'
valid: '/workspace/datasets/ntire20/DPEDiphone-va'
datasets:
df2k: 'DF2K'
dped: 'DPED'
import math
import numpy as np
from PIL import Image
import paddle
# set random seed for reproducibility
np.random.seed(0)
def is_image_file(filename):
return any(
filename.endswith(extension)
for extension in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG'])
def calculate_valid_crop_size(crop_size, upscale_factor):
return crop_size - (crop_size % upscale_factor)
def gaussian_noise(image, std_dev):
noise = np.rint(
np.random.normal(loc=0.0, scale=std_dev, size=np.shape(image)))
return Image.fromarray(np.clip(image + noise, 0, 255).astype(np.uint8))
#################################################################################
# MATLAB imresize taken from ESRGAN (https://github.com/xinntao/BasicSR)
#################################################################################
def cubic(x):
absx = paddle.abs(x)
absx2 = absx**2
absx3 = absx**3
temp1 = paddle.cast((absx <= 1), absx.dtype)
temp2 = paddle.cast((absx > 1), absx.dtype) * paddle.cast(
(absx <= 2), absx.dtype)
return (1.5 * absx3 - 2.5 * absx2 +
1) * temp1 + (-0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2) * temp2
def calculate_weights_indices(in_length, out_length, scale, kernel,
kernel_width, antialiasing):
if (scale < 1) and (antialiasing):
# Use a modified kernel to simultaneously interpolate and antialias- larger kernel width
kernel_width = kernel_width / scale
# Output-space coordinates
x = paddle.linspace(1, out_length, out_length)
# Input-space coordinates. Calculate the inverse mapping such that 0.5
# in output space maps to 0.5 in input space, and 0.5+scale in output
# space maps to 1.5 in input space.
u = x / scale + 0.5 * (1 - 1 / scale)
# What is the left-most pixel that can be involved in the computation?
left = paddle.floor(u - kernel_width / 2)
# What is the maximum number of pixels that can be involved in the
# computation? Note: it's OK to use an extra pixel here; if the
# corresponding weights are all zero, it will be eliminated at the end
# of this function.
P = math.ceil(kernel_width) + 2
# The indices of the input pixels involved in computing the k-th output
# pixel are in row k of the indices matrix.
indices = left.reshape([out_length, 1]).expand([
out_length, P
]) + paddle.linspace(0, P - 1, P).reshape([1, P]).expand([out_length, P])
# The weights used to compute the k-th output pixel are in row k of the
# weights matrix.
distance_to_center = u.reshape([out_length, 1]).expand([out_length, P
]) - indices
# apply cubic kernel
if (scale < 1) and (antialiasing):
weights = scale * cubic(distance_to_center * scale)
else:
weights = cubic(distance_to_center)
# Normalize the weights matrix so that each row sums to 1.
weights_sum = paddle.sum(weights, 1).reshape([out_length, 1])
weights = weights / weights_sum.expand([out_length, P])
# If a column in weights is all zero, get rid of it. only consider the first and last column.
weights_zero_tmp = np.sum((weights.numpy() == 0), 0)
if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6):
indices = indices[:, 1:1 + P - 2]
weights = weights[:, 1:1 + P - 2]
if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6):
indices = indices[:, 0:P - 2]
weights = weights[:, 0:P - 2]
sym_len_s = -indices.min() + 1
sym_len_e = indices.max() - in_length
indices = indices + sym_len_s - 1
return weights, indices, int(sym_len_s), int(sym_len_e)
def imresize(img, scale, antialiasing=True):
# Now the scale should be the same for H and W
# input: img: CHW RGB [0,1]
# output: CHW RGB [0,1] w/o round
in_C, in_H, in_W = img.shape
_, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
kernel_width = 4
kernel = 'cubic'
# Return the desired dimension order for performing the resize. The
# strategy is to perform the resize first along the dimension with the
# smallest scale factor.
# Now we do not support this.
# get weights and indices
weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
in_H, out_H, scale, kernel, kernel_width, antialiasing)
weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
in_W, out_W, scale, kernel, kernel_width, antialiasing)
# process H dimension
# symmetric copying
img_aug = paddle.zeros([in_C, in_H + sym_len_Hs + sym_len_He, in_W])
img_aug[:, sym_len_Hs:sym_len_Hs + in_H, :] = img
sym_patch = img[:, :sym_len_Hs, :]
inv_idx = paddle.arange(sym_patch.shape[1] - 1, -1, -1)
sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 1)
img_aug[:, :sym_len_Hs, :] = sym_patch_inv
sym_patch = img[:, -sym_len_He:, :]
inv_idx = paddle.arange(sym_patch.shape[1] - 1, -1, -1)
sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 1)
img_aug[:,
sym_len_Hs + in_H:sym_len_Hs + in_H + sym_len_He, :] = sym_patch_inv
out_1 = paddle.zeros([in_C, out_H, in_W])
kernel_width = weights_H.shape[1]
for i in range(out_H):
idx = int(indices_H[i][0])
out_1[0, i, :] = paddle.mv(
img_aug[0, idx:idx + kernel_width, :].transpose([1, 0]),
(weights_H[i]))
out_1[1, i, :] = paddle.mv(
img_aug[1, idx:idx + kernel_width, :].transpose([1, 0]),
(weights_H[i]))
out_1[2, i, :] = paddle.mv(
img_aug[2, idx:idx + kernel_width, :].transpose([1, 0]),
(weights_H[i]))
# process W dimension
# symmetric copying
out_1_aug = paddle.zeros([in_C, out_H, in_W + sym_len_Ws + sym_len_We])
out_1_aug[:, :, sym_len_Ws:sym_len_Ws + in_W] = out_1
sym_patch = out_1[:, :, :sym_len_Ws]
inv_idx = paddle.arange(sym_patch.shape[2] - 1, -1, -1)
sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 2)
out_1_aug[:, :, 0:sym_len_Ws] = sym_patch_inv
sym_patch = out_1[:, :, -sym_len_We:]
inv_idx = paddle.arange(sym_patch.shape[2] - 1, -1, -1)
sym_patch_inv = paddle.index_select(sym_patch, inv_idx, 2)
out_1_aug[:, :,
sym_len_Ws + in_W:sym_len_Ws + in_W + sym_len_We] = sym_patch_inv
out_2 = paddle.zeros([in_C, out_H, out_W])
kernel_width = weights_W.shape[1]
for i in range(out_W):
idx = int(indices_W[i][0])
out_2[0, :, i] = out_1_aug[0, :,
idx:idx + kernel_width].mv(weights_W[i])
out_2[1, :, i] = out_1_aug[1, :,
idx:idx + kernel_width].mv(weights_W[i])
out_2[2, :, i] = out_1_aug[2, :,
idx:idx + kernel_width].mv(weights_W[i])
return paddle.clip(out_2, 0, 1)
def to_pil_image(pic, mode=None):
"""Convert a tensor or an ndarray to PIL Image.
Args:
pic (paddle.Tensor or numpy.ndarray): Image to be converted to PIL Image.
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
.. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
Returns:
PIL Image: Image converted to PIL Image.
"""
if not (isinstance(pic, paddle.Tensor) or isinstance(pic, np.ndarray)):
raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(
type(pic)))
elif isinstance(pic, paddle.Tensor):
if len(pic.shape) not in {2, 3}:
raise ValueError(
'pic should be 2/3 dimensional. Got {} dimensions.'.format(
pic.ndimension()))
elif len(pic.shape) == 2:
# if 2D image, add channel dimension (CHW)
pic = pic.unsqueeze(0)
elif isinstance(pic, np.ndarray):
if pic.ndim not in {2, 3}:
raise ValueError(
'pic should be 2/3 dimensional. Got {} dimensions.'.format(
pic.ndim))
elif pic.ndim == 2:
# if 2D image, add channel dimension (HWC)
pic = np.expand_dims(pic, 2)
npimg = pic
if isinstance(pic, paddle.Tensor) and mode != 'F':
pic = pic.numpy()
if pic.dtype == 'float32':
npimg = np.transpose((pic * 255.).astype('uint8'), (1, 2, 0))
if not isinstance(npimg, np.ndarray):
raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
'not {}'.format(type(npimg)))
if npimg.shape[2] == 1:
expected_mode = None
npimg = npimg[:, :, 0]
if npimg.dtype == np.uint8:
expected_mode = 'L'
elif npimg.dtype == np.int16:
expected_mode = 'I;16'
elif npimg.dtype == np.int32:
expected_mode = 'I'
elif npimg.dtype == np.float32:
expected_mode = 'F'
if mode is not None and mode != expected_mode:
raise ValueError(
"Incorrect mode ({}) supplied for input type {}. Should be {}".
format(mode, np.dtype, expected_mode))
mode = expected_mode
elif npimg.shape[2] == 2:
permitted_2_channel_modes = ['LA']
if mode is not None and mode not in permitted_2_channel_modes:
raise ValueError("Only modes {} are supported for 2D inputs".format(
permitted_2_channel_modes))
if mode is None and npimg.dtype == np.uint8:
mode = 'LA'
elif npimg.shape[2] == 4:
permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
if mode is not None and mode not in permitted_4_channel_modes:
raise ValueError("Only modes {} are supported for 4D inputs".format(
permitted_4_channel_modes))
if mode is None and npimg.dtype == np.uint8:
mode = 'RGBA'
else:
permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
if mode is not None and mode not in permitted_3_channel_modes:
raise ValueError("Only modes {} are supported for 3D inputs".format(
permitted_3_channel_modes))
if mode is None and npimg.dtype == np.uint8:
mode = 'RGB'
if mode is None:
raise TypeError('Input type {} is not supported'.format(npimg.dtype))
return Image.fromarray(npimg, mode=mode)
from .io import LoadImageFromFile
from .transforms import (PairedRandomCrop, PairedRandomHorizontalFlip,
PairedRandomVerticalFlip, PairedRandomTransposeHW,
SRPairedRandomCrop, SplitPairedImage)
SRPairedRandomCrop, SplitPairedImage, SRNoise)
from .builder import build_preprocess
......@@ -14,9 +14,13 @@
import sys
import cv2
import glob
import random
import numbers
import collections
import numpy as np
from PIL import Image
import paddle.vision.transforms as T
import paddle.vision.transforms.functional as F
......@@ -230,3 +234,31 @@ class SRPairedRandomCrop(T.BaseTransform):
outputs = (lq, gt)
return outputs
@TRANSFORMS.register()
class SRNoise(T.BaseTransform):
"""Super resolution noise.
Args:
noise_path (str): directory of noise image.
size (int): cropped noise patch size.
"""
def __init__(self, noise_path, size, keys=None):
self.noise_path = noise_path
self.noise_imgs = sorted(glob.glob(noise_path + '*.png'))
self.size = size
self.keys = keys
self.transform = T.Compose([
T.RandomCrop(size),
T.Transpose(),
T.Normalize([0., 0., 0.], [255., 255., 255.])
])
def _apply_image(self, image):
idx = np.random.randint(0, len(self.noise_imgs))
noise = self.transform(Image.open(self.noise_imgs[idx]))
normed_noise = noise - np.mean(noise, axis=(1, 2), keepdims=True)
image = image + normed_noise
image = np.clip(image, 0., 1.)
return image
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册