提交 d78530cf 编写于 作者: L LielinJiang

move some model to ppgan

上级 9ed134f8
......@@ -13,8 +13,8 @@ import cv2
import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
import networks
from util import *
from my_args import parser
......@@ -129,7 +129,7 @@ class VideoFrameInterp(object):
r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2)
out_path = dump_frames_ffmpeg(vid, frame_path_input)
out_path = video2frames(vid, frame_path_input)
vidname = vid.split('/')[-1].split('.')[0]
......@@ -266,7 +266,7 @@ class VideoFrameInterp(object):
vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output,
frames2video(frame_pattern_combined, video_pattern_output,
r2)
return frame_pattern_combined, video_pattern_output
......
......@@ -21,66 +21,6 @@ class AverageMeter(object):
self.avg = self.sum / self.count
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, vid_name)
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ',
ss,
' -t ',
t,
' -i ',
vid_path,
' -r ',
r,
# ' -f ', ' image2 ',
# ' -s ', ' 960*540 ',
' -qscale:v ',
' 0.1 ',
' -start_number ',
' 0 ',
# ' -qmax ', ' 1 ',
outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(vid_name))
sys.stdout.flush()
return out_full_path
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(videopath))
sys.stdout.flush()
def combine_frames(input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
......
......@@ -14,8 +14,10 @@ import pickle
from PIL import Image
from tqdm import tqdm
from paddle import fluid
from model import build_model
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames
from ppgan.models.generators.deoldify import build_model
parser = argparse.ArgumentParser(description='DeOldify')
parser.add_argument('--input', type=str, default='none', help='Input video')
......@@ -32,22 +34,6 @@ parser.add_argument('--weight_path',
DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(videopath))
sys.stdout.flush()
class DeOldifyPredictor():
def __init__(self,
input,
......@@ -127,7 +113,7 @@ class DeOldifyPredictor():
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
out_path = video2frames(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
......@@ -141,42 +127,12 @@ class DeOldifyPredictor():
vid_out_path = os.path.join(output_path,
'{}_deoldify_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path,
frames2video(frame_pattern_combined, vid_out_path,
str(int(fps)))
return frame_pattern_combined, vid_out_path
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ',
' 0.1 ', ' -start_number ', ' 0 ', outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(vid_name))
sys.stdout.flush()
return out_full_path
if __name__ == '__main__':
paddle.disable_static()
args = parser.parse_args()
......
import numpy as np
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid import layers as F
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype
import paddle
import paddle.nn as nn
class _SpectralNorm(nn.SpectralNorm):
def __init__(self,
weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype)
def forward(self, weight):
check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0
self._helper.append_op(
type="spectral_norm",
inputs=inputs,
outputs={"Out": out, },
attrs={
"dim": self._dim,
"power_iters": _power_iters, #self._power_iters,
"eps": self._eps,
})
return out
class Spectralnorm(nn.Layer):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype)
self.dim = dim
self.power_iters = power_iters
self.eps = eps
self.layer = layer
weight = layer._parameters['weight']
del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype)
self.weight_orig.set_value(weight)
def forward(self, x):
weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight
out = self.layer(x)
return out
......@@ -14,7 +14,7 @@ from tqdm import tqdm
import argparse
import subprocess
import utils
from remasternet import NetworkR, NetworkC
from ppgan.models.generators.remaster import NetworkR, NetworkC
from paddle.utils.download import get_path_from_url
DeepRemaster_weight_url = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'
......
......@@ -30,6 +30,7 @@ import cv2
from tqdm import tqdm
from data import EDVRDataset
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames
EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
......@@ -71,52 +72,6 @@ def save_img(img, framename):
cv2.imwrite(framename, img)
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ',
' 0.1 ', ' -start_number ', ' 0 ', outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(vid_name))
sys.stdout.flush()
return out_full_path
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(videopath))
sys.stdout.flush()
class EDVRPredictor:
def __init__(self, input, output, weight_path=None):
self.input = input
......@@ -129,8 +84,6 @@ class EDVRPredictor:
if weight_path is None:
weight_path = get_path_from_url(EDVR_weight_url, cur_path)
print(weight_path)
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams'
......@@ -155,7 +108,7 @@ class EDVRPredictor:
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
out_path = video2frames(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
......@@ -188,7 +141,7 @@ class EDVRPredictor:
frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(self.output,
'{}_edvr_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path,
frames2video(frame_pattern_combined, vid_out_path,
str(int(fps)))
return frame_pattern_combined, vid_out_path
......
......@@ -13,7 +13,9 @@ import pickle
from PIL import Image
from tqdm import tqdm
from sr_model import RRDBNet
from ppgan.models.generators import RRDBNet
from ppgan.utils.video import frames2video, video2frames
from paddle.utils.download import get_path_from_url
parser = argparse.ArgumentParser(description='RealSR')
......@@ -27,22 +29,6 @@ parser.add_argument('--weight_path',
RealSR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(videopath))
sys.stdout.flush()
class RealSRPredictor():
def __init__(self, input, output, batch_size=1, weight_path=None):
self.input = input
......@@ -88,7 +74,7 @@ class RealSRPredictor():
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
out_path = video2frames(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
......@@ -102,42 +88,12 @@ class RealSRPredictor():
vid_out_path = os.path.join(output_path,
'{}_realsr_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path,
frames2video(frame_pattern_combined, vid_out_path,
str(int(fps)))
return frame_pattern_combined, vid_out_path
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ', ss, ' -t ', t, ' -i ', vid_path, ' -r ', r, ' -qscale:v ',
' 0.1 ', ' -start_number ', ' 0 ', outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
if os.system(cmd) == 0:
pass
else:
print('ffmpeg process video: {} error'.format(vid_name))
sys.stdout.flush()
return out_full_path
if __name__ == '__main__':
paddle.disable_static()
args = parser.parse_args()
......
......@@ -41,6 +41,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
test:
name: SingleDataset
dataroot: data/cityscapes/testB
......@@ -55,6 +60,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
optimizer:
......
......@@ -40,6 +40,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
test:
name: SingleDataset
dataroot: data/horse2zebra/testA
......@@ -54,7 +59,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
optimizer:
name: Adam
......
......@@ -38,6 +38,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
test:
name: PairedDataset
dataroot: data/cityscapes/
......@@ -53,6 +58,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
optimizer:
name: Adam
......
......@@ -37,6 +37,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
test:
name: PairedDataset
dataroot: data/cityscapes/
......@@ -52,6 +57,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
optimizer:
name: Adam
......
......@@ -37,6 +37,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
test:
name: PairedDataset
dataroot: data/facades/
......@@ -52,6 +57,11 @@ dataset:
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
optimizer:
name: Adam
......
......@@ -94,10 +94,9 @@ def get_transform(cfg,
if convert:
transform_list += [transforms.Permute(to_rgb=True)]
transform_list += [
transforms.Normalize((0., 0., 0.), (255., 255., 255.))
]
# transform_list += [
# transforms.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5))
# ]
if cfg.get('normalize', None):
transform_list += [
transforms.Normalize(cfg.normalize.mean, cfg.normalize.std)
]
return transforms.Compose(transform_list)
# import mmcv
import os
import cv2
import random
import numpy as np
import paddle.vision.transforms as transform
from pathlib import Path
from paddle.io import Dataset
from .builder import DATASETS
def scandir(dir_path, suffix=None, recursive=False):
"""Scan a directory to find the interested files.
"""
if isinstance(dir_path, (str, Path)):
dir_path = str(dir_path)
else:
raise TypeError('"dir_path" must be a string or Path object')
if (suffix is not None) and not isinstance(suffix, (str, tuple)):
raise TypeError('"suffix" must be a string or tuple of strings')
root = dir_path
def _scandir(dir_path, suffix, recursive):
for entry in os.scandir(dir_path):
if not entry.name.startswith('.') and entry.is_file():
rel_path = os.path.relpath(entry.path, root)
if suffix is None:
yield rel_path
elif rel_path.endswith(suffix):
yield rel_path
else:
if recursive:
yield from _scandir(
entry.path, suffix=suffix, recursive=recursive)
else:
continue
return _scandir(dir_path, suffix=suffix, recursive=recursive)
def paired_paths_from_folder(folders, keys, filename_tmpl):
"""Generate paired paths from folders.
"""
assert len(folders) == 2, (
'The len of folders should be 2 with [input_folder, gt_folder]. '
f'But got {len(folders)}')
assert len(keys) == 2, (
'The len of keys should be 2 with [input_key, gt_key]. '
f'But got {len(keys)}')
input_folder, gt_folder = folders
input_key, gt_key = keys
input_paths = list(scandir(input_folder))
gt_paths = list(scandir(gt_folder))
assert len(input_paths) == len(gt_paths), (
f'{input_key} and {gt_key} datasets have different number of images: '
f'{len(input_paths)}, {len(gt_paths)}.')
paths = []
for gt_path in gt_paths:
basename, ext = os.path.splitext(os.path.basename(gt_path))
input_name = f'{filename_tmpl.format(basename)}{ext}'
input_path = os.path.join(input_folder, input_name)
assert input_name in input_paths, (f'{input_name} is not in '
f'{input_key}_paths.')
gt_path = os.path.join(gt_folder, gt_path)
paths.append(
dict([(f'{input_key}_path', input_path),
(f'{gt_key}_path', gt_path)]))
return paths
def paired_random_crop(img_gts, img_lqs, gt_patch_size, scale, gt_path):
"""Paired random crop.
It crops lists of lq and gt images with corresponding locations.
Args:
img_gts (list[ndarray] | ndarray): GT images. Note that all images
should have the same shape. If the input is an ndarray, it will
be transformed to a list containing itself.
img_lqs (list[ndarray] | ndarray): LQ images. Note that all images
should have the same shape. If the input is an ndarray, it will
be transformed to a list containing itself.
gt_patch_size (int): GT patch size.
scale (int): Scale factor.
gt_path (str): Path to ground-truth.
Returns:
list[ndarray] | ndarray: GT images and LQ images. If returned results
only have one element, just return ndarray.
"""
if not isinstance(img_gts, list):
img_gts = [img_gts]
if not isinstance(img_lqs, list):
img_lqs = [img_lqs]
h_lq, w_lq, _ = img_lqs[0].shape
h_gt, w_gt, _ = img_gts[0].shape
lq_patch_size = gt_patch_size // scale
if h_gt != h_lq * scale or w_gt != w_lq * scale:
raise ValueError(
f'Scale mismatches. GT ({h_gt}, {w_gt}) is not {scale}x ',
f'multiplication of LQ ({h_lq}, {w_lq}).')
if h_lq < lq_patch_size or w_lq < lq_patch_size:
raise ValueError(f'LQ ({h_lq}, {w_lq}) is smaller than patch size '
f'({lq_patch_size}, {lq_patch_size}). '
f'Please remove {gt_path}.')
# randomly choose top and left coordinates for lq patch
top = random.randint(0, h_lq - lq_patch_size)
left = random.randint(0, w_lq - lq_patch_size)
# crop lq patch
img_lqs = [
v[top:top + lq_patch_size, left:left + lq_patch_size, ...]
for v in img_lqs
]
# crop corresponding gt patch
top_gt, left_gt = int(top * scale), int(left * scale)
img_gts = [
v[top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size, ...]
for v in img_gts
]
if len(img_gts) == 1:
img_gts = img_gts[0]
if len(img_lqs) == 1:
img_lqs = img_lqs[0]
return img_gts, img_lqs
def augment(imgs, hflip=True, rotation=True, flows=None):
"""Augment: horizontal flips OR rotate (0, 90, 180, 270 degrees).
"""
hflip = hflip and random.random() < 0.5
vflip = rotation and random.random() < 0.5
rot90 = rotation and random.random() < 0.5
def _augment(img):
if hflip:
cv2.flip(img, 1, img)
if vflip:
cv2.flip(img, 0, img)
if rot90:
img = img.transpose(1, 0, 2)
return img
def _augment_flow(flow):
if hflip:
cv2.flip(flow, 1, flow)
flow[:, :, 0] *= -1
if vflip:
cv2.flip(flow, 0, flow)
flow[:, :, 1] *= -1
if rot90:
flow = flow.transpose(1, 0, 2)
flow = flow[:, :, [1, 0]]
return flow
if not isinstance(imgs, list):
imgs = [imgs]
imgs = [_augment(img) for img in imgs]
if len(imgs) == 1:
imgs = imgs[0]
if flows is not None:
if not isinstance(flows, list):
flows = [flows]
flows = [_augment_flow(flow) for flow in flows]
if len(flows) == 1:
flows = flows[0]
return imgs, flows
else:
return imgs
@DATASETS.register()
class SRImageDataset(Dataset):
"""Paired image dataset for image restoration."""
def __init__(self, cfg):
super(SRImageDataset, self).__init__()
self.cfg = cfg
self.file_client = None
self.io_backend_opt = cfg['io_backend']
self.gt_folder, self.lq_folder = cfg['dataroot_gt'], cfg['dataroot_lq']
if 'filename_tmpl' in cfg:
self.filename_tmpl = cfg['filename_tmpl']
else:
self.filename_tmpl = '{}'
if self.io_backend_opt['type'] == 'lmdb':
#TODO: LielinJiang support lmdb to accelerate io
pass
elif 'meta_info_file' in self.cfg and self.cfg[
'meta_info_file'] is not None:
#TODO: LielinJiang support lmdb to accelerate io
pass
else:
self.paths = paired_paths_from_folder(
[self.lq_folder, self.gt_folder], ['lq', 'gt'],
self.filename_tmpl)
def __getitem__(self, index):
scale = self.cfg['scale']
# Load gt and lq images. Dimension order: HWC; channel order: BGR;
# image range: [0, 1], float32.
gt_path = self.paths[index]['gt_path']
lq_path = self.paths[index]['lq_path']
img_gt = cv2.imread(gt_path).astype(np.float32) / 255.
img_lq = cv2.imread(lq_path).astype(np.float32) / 255.
# augmentation for training
if self.cfg['phase'] == 'train':
gt_size = self.cfg['gt_size']
# random crop
img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale,
gt_path)
# flip, rotation
img_gt, img_lq = augment([img_gt, img_lq], self.cfg['use_flip'],
self.cfg['use_rot'])
# TODO: color space transform
# BGR to RGB, HWC to CHW, numpy to tensor
permute = transform.Permute()
img_gt = permute(img_gt)
img_lq = permute(img_lq)
return {
'lq': img_lq,
'gt': img_gt,
'lq_path': lq_path,
'gt_path': gt_path
}
def __len__(self):
return len(self.paths)
......@@ -40,6 +40,9 @@ class Trainer:
self.weight_interval = cfg.snapshot_config.interval
self.log_interval = cfg.log_config.interval
self.visual_interval = cfg.log_config.visiual_interval
self.validate_interval = -1
if cfg.get('validate', None) is not None:
self.validate_interval = cfg.validate.get('interval', -1)
self.cfg = cfg
self.local_rank = ParallelEnv().local_rank
......@@ -81,7 +84,8 @@ class Trainer:
step_start_time = time.time()
self.logger.info('train one epoch time: {}'.format(time.time() -
start_time))
self.validate()
if self.validate_interval > -1 and epoch % self.validate_interval:
self.validate()
self.model.lr_scheduler.step()
if epoch % self.weight_interval == 0:
self.save(epoch, 'weight', keep=-1)
......@@ -103,26 +107,22 @@ class Trainer:
current_paths = self.model.get_image_paths()
current_visuals = self.model.get_current_visuals()
# print('debug1:', self.cfg.validate.metrics)
for j in range(len(current_paths)):
short_path = os.path.basename(current_paths[j])
basename = os.path.splitext(short_path)[0]
for k, img_tensor in current_visuals.items():
name = '%s_%s' % (basename, k)
visual_results.update({name: img_tensor[j]})
# print('debug2:', self.cfg.validate.metrics)
if 'psnr' in self.cfg.validate.metrics:
# args = copy.deepcopy(self.cfg.validate.metrics.pnsr)
# args.pop('name')
if 'psnr' not in metric_result:
metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr)
metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr)
else:
metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.psnr)
metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr)
if 'ssim' in self.cfg.validate.metrics:
if 'ssim' not in metric_result:
metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim)
metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim)
else:
metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j]), tensor2img(current_visuals['gt'][j]), **self.cfg.validate.metrics.ssim)
metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim)
self.visual('visual_val', visual_results=visual_results)
......@@ -200,8 +200,11 @@ class Trainer:
msg = ''
makedirs(os.path.join(self.output_dir, results_dir))
min_max = self.cfg.get('min_max', None)
if min_max is None:
min_max = (-1., 1.)
for label, image in visual_results.items():
image_numpy = tensor2img(image)
image_numpy = tensor2img(image, min_max)
img_path = os.path.join(self.output_dir, results_dir,
msg + '%s.png' % (label))
save_image(image_numpy, img_path)
......
import numpy as np
def reorder_image(img, input_order='HWC'):
"""Reorder images to 'HWC' order.
If the input_order is (h, w), return (h, w, 1);
If the input_order is (c, h, w), return (h, w, c);
If the input_order is (h, w, c), return as it is.
Args:
img (ndarray): Input image.
input_order (str): Whether the input order is 'HWC' or 'CHW'.
If the input image shape is (h, w), input_order will not have
effects. Default: 'HWC'.
Returns:
ndarray: reordered image.
"""
if input_order not in ['HWC', 'CHW']:
raise ValueError(
f'Wrong input_order {input_order}. Supported input_orders are '
"'HWC' and 'CHW'")
if len(img.shape) == 2:
img = img[..., None]
return img
if input_order == 'CHW':
img = img.transpose(1, 2, 0)
return img
def bgr2ycbcr(img, y_only=False):
"""Convert a BGR image to YCbCr image.
The bgr version of rgb2ycbcr.
It implements the ITU-R BT.601 conversion for standard-definition
television. See more details in
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
In OpenCV, it implements a JPEG conversion. See more details in
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
Args:
img (ndarray): The input image. It accepts:
1. np.uint8 type with range [0, 255];
2. np.float32 type with range [0, 1].
y_only (bool): Whether to only return Y channel. Default: False.
Returns:
ndarray: The converted YCbCr image. The output image has the same type
and range as input image.
"""
img_type = img.dtype
img = _convert_input_type_range(img)
if y_only:
out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
else:
out_img = np.matmul(
img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
[65.481, -37.797, 112.0]]) + [16, 128, 128]
out_img = _convert_output_type_range(out_img, img_type)
return out_img
def to_y_channel(img):
"""Change to Y channel of YCbCr.
Args:
img (ndarray): Images with range [0, 255].
Returns:
(ndarray): Images with range [0, 255] (float type) without round.
"""
img = img.astype(np.float32) / 255.
if img.ndim == 3 and img.shape[2] == 3:
img = bgr2ycbcr(img, y_only=True)
img = img[..., None]
return img * 255.
import cv2
import numpy as np
from .metric_util import reorder_image, to_y_channel
def calculate_psnr(img1,
img2,
crop_border,
input_order='HWC',
test_y_channel=False):
"""Calculate PSNR (Peak Signal-to-Noise Ratio).
Ref: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
Args:
img1 (ndarray): Images with range [0, 255].
img2 (ndarray): Images with range [0, 255].
crop_border (int): Cropped pixels in each edge of an image. These
pixels are not involved in the PSNR calculation.
input_order (str): Whether the input order is 'HWC' or 'CHW'.
Default: 'HWC'.
test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
Returns:
float: psnr result.
"""
assert img1.shape == img2.shape, (
f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
if input_order not in ['HWC', 'CHW']:
raise ValueError(
f'Wrong input_order {input_order}. Supported input_orders are '
'"HWC" and "CHW"')
img1 = reorder_image(img1, input_order=input_order)
img2 = reorder_image(img2, input_order=input_order)
if crop_border != 0:
img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...]
img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...]
if test_y_channel:
img1 = to_y_channel(img1)
img2 = to_y_channel(img2)
mse = np.mean((img1 - img2)**2)
if mse == 0:
return float('inf')
return 20. * np.log10(255. / np.sqrt(mse))
def _ssim(img1, img2):
"""Calculate SSIM (structural similarity) for one channel images.
It is called by func:`calculate_ssim`.
Args:
img1 (ndarray): Images with range [0, 255] with order 'HWC'.
img2 (ndarray): Images with range [0, 255] with order 'HWC'.
Returns:
float: ssim result.
"""
C1 = (0.01 * 255)**2
C2 = (0.03 * 255)**2
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
kernel = cv2.getGaussianKernel(11, 1.5)
window = np.outer(kernel, kernel.transpose())
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + C1) *
(2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
(sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()
def calculate_ssim(img1,
img2,
crop_border,
input_order='HWC',
test_y_channel=False):
"""Calculate SSIM (structural similarity).
Ref:
Image quality assessment: From error visibility to structural similarity
The results are the same as that of the official released MATLAB code in
https://ece.uwaterloo.ca/~z70wang/research/ssim/.
For three-channel images, SSIM is calculated for each channel and then
averaged.
Args:
img1 (ndarray): Images with range [0, 255].
img2 (ndarray): Images with range [0, 255].
crop_border (int): Cropped pixels in each edge of an image. These
pixels are not involved in the SSIM calculation.
input_order (str): Whether the input order is 'HWC' or 'CHW'.
Default: 'HWC'.
test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
Returns:
float: ssim result.
"""
assert img1.shape == img2.shape, (
f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
if input_order not in ['HWC', 'CHW']:
raise ValueError(
f'Wrong input_order {input_order}. Supported input_orders are '
'"HWC" and "CHW"')
img1 = reorder_image(img1, input_order=input_order)
img2 = reorder_image(img2, input_order=input_order)
if crop_border != 0:
img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...]
img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...]
if test_y_channel:
img1 = to_y_channel(img1)
img2 = to_y_channel(img2)
ssims = []
for i in range(img1.shape[2]):
ssims.append(_ssim(img1[..., i], img2[..., i]))
return np.array(ssims).mean()
from .resnet_backbone import resnet18, resnet34, resnet50, resnet101, resnet152
\ No newline at end of file
......@@ -3,10 +3,9 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from resnet_backbone import resnet34, resnet101
from hook import hook_outputs, model_sizes, dummy_eval
from spectral_norm import Spectralnorm
from paddle import fluid
from .hook import hook_outputs, model_sizes, dummy_eval
from ..backbones import resnet34, resnet101
from ...modules.nn import Spectralnorm
class SequentialEx(nn.Layer):
......@@ -206,7 +205,7 @@ class UnetBlockWide(nn.Layer):
return self.conv(cat_x)
class UnetBlockDeep(paddle.fluid.Layer):
class UnetBlockDeep(nn.Layer):
"A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
def __init__(
......@@ -319,7 +318,7 @@ def conv_layer(ni: int,
return nn.Sequential(*layers)
class CustomPixelShuffle_ICNR(paddle.fluid.Layer):
class CustomPixelShuffle_ICNR(nn.Layer):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
def __init__(self,
......@@ -349,7 +348,7 @@ class CustomPixelShuffle_ICNR(paddle.fluid.Layer):
return self.blur(self.pad(x)) if self.blur else x
class MergeLayer(paddle.fluid.Layer):
class MergeLayer(nn.Layer):
"Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`."
def __init__(self, dense: bool = False):
......@@ -379,7 +378,7 @@ def res_block(nf,
MergeLayer(dense))
class SigmoidRange(paddle.fluid.Layer):
class SigmoidRange(nn.Layer):
"Sigmoid module with range `(low,x_max)`"
def __init__(self, low, high):
......@@ -395,13 +394,13 @@ def sigmoid_range(x, low, high):
return F.sigmoid(x) * (high - low) + low
class PixelShuffle(paddle.fluid.Layer):
class PixelShuffle(nn.Layer):
def __init__(self, upscale_factor):
super(PixelShuffle, self).__init__()
self.upscale_factor = upscale_factor
def forward(self, x):
return paddle.fluid.layers.pixel_shuffle(x, self.upscale_factor)
return F.pixel_shuffle(x, self.upscale_factor)
class ReplicationPad2d(nn.Layer):
......@@ -410,7 +409,7 @@ class ReplicationPad2d(nn.Layer):
self.size = size
def forward(self, x):
return paddle.fluid.layers.pad2d(x, self.size, mode="edge")
return F.pad2d(x, self.size, mode="edge")
def conv1d(ni: int,
......
......@@ -3,6 +3,8 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from .builder import GENERATORS
class ResidualDenseBlock_5C(nn.Layer):
def __init__(self, nf=64, gc=32, bias=True):
......@@ -15,6 +17,7 @@ class ResidualDenseBlock_5C(nn.Layer):
self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias_attr=bias)
self.lrelu = nn.LeakyReLU(negative_slope=0.2)
def forward(self, x):
x1 = self.lrelu(self.conv1(x))
x2 = self.lrelu(self.conv2(paddle.concat((x, x1), 1)))
......@@ -26,6 +29,7 @@ class ResidualDenseBlock_5C(nn.Layer):
class RRDB(nn.Layer):
'''Residual in Residual Dense Block'''
def __init__(self, nf, gc=32):
super(RRDB, self).__init__()
self.RDB1 = ResidualDenseBlock_5C(nf, gc)
......@@ -38,7 +42,6 @@ class RRDB(nn.Layer):
out = self.RDB3(out)
return out * 0.2 + x
def make_layer(block, n_layers):
layers = []
for _ in range(n_layers):
......@@ -46,6 +49,7 @@ def make_layer(block, n_layers):
return nn.Sequential(*layers)
@GENERATORS.register()
class RRDBNet(nn.Layer):
def __init__(self, in_nc, out_nc, nf, nb, gc=32):
super(RRDBNet, self).__init__()
......@@ -67,10 +71,8 @@ class RRDBNet(nn.Layer):
trunk = self.trunk_conv(self.RRDB_trunk(fea))
fea = fea + trunk
fea = self.lrelu(
self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
fea = self.lrelu(
self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
out = self.conv_last(self.lrelu(self.HRconv(fea)))
return out
from collections import OrderedDict
import paddle
import paddle.nn as nn
# import torch.nn.parallel as P
# from torch.nn.parallel import DataParallel, DistributedDataParallel
# import models.networks as networks
# import models.lr_scheduler as lr_scheduler
from .generators.builder import build_generator
from .discriminators.builder import build_discriminator
from ..solver import build_optimizer
from .base_model import BaseModel
from .losses import GANLoss
from .builder import MODELS
import importlib
import mmcv
import torch
from collections import OrderedDict
from copy import deepcopy
from os import path as osp
from .builder import MODELS
@MODELS.register()
class SRModel(BaseModel):
"""Base SR model for single image super-resolution."""
def __init__(self, cfg):
super(SRModel, self).__init__(cfg)
self.model_names = ['G']
self.netG = build_generator(cfg.model.generator)
self.visual_names = ['lq', 'output', 'gt']
self.loss_names = ['l_total']
# define network
# self.net_g = networks.define_net_g(deepcopy(opt['network_g']))
# self.net_g = self.model_to_device(self.net_g)
# self.print_network(self.net_g)
# load pretrained models
# load_path = self.opt['path'].get('pretrain_model_g', None)
# if load_path is not None:
# self.load_network(self.net_g, load_path,
# self.opt['path']['strict_load'])
self.optimizers = []
if self.isTrain:
self.criterionL1 = paddle.nn.L1Loss()
self.build_lr_scheduler()
self.optimizer_G = build_optimizer(
cfg.optimizer,
self.lr_scheduler,
parameter_list=self.netG.parameters())
self.optimizers.append(self.optimizer_G)
# self.optimizer_D = build_optimizer(
# opt.optimizer,
# self.lr_scheduler,
# parameter_list=self.netD.parameters())
# self.init_training_settings()
# def init_training_settings(self):
# self.net_g.train()
# train_opt = self.opt['train']
# # define losses
# if train_opt.get('pixel_opt'):
# pixel_type = train_opt['pixel_opt'].pop('type')
# cri_pix_cls = getattr(loss_module, pixel_type)
# self.cri_pix = cri_pix_cls(**train_opt['pixel_opt']).to(
# self.device)
# else:
# self.cri_pix = None
# if train_opt.get('perceptual_opt'):
# percep_type = train_opt['perceptual_opt'].pop('type')
# cri_perceptual_cls = getattr(loss_module, percep_type)
# self.cri_perceptual = cri_perceptual_cls(
# **train_opt['perceptual_opt']).to(self.device)
# else:
# self.cri_perceptual = None
# if self.cri_pix is None and self.cri_perceptual is None:
# raise ValueError('Both pixel and perceptual losses are None.')
# # set up optimizers and schedulers
# self.setup_optimizers()
# self.setup_schedulers()
# def setup_optimizers(self):
# train_opt = self.opt['train']
# optim_params = []
# for k, v in self.net_g.named_parameters():
# if v.requires_grad:
# optim_params.append(v)
# else:
# logger = get_root_logger()
# logger.warning(f'Params {k} will not be optimized.')
# optim_type = train_opt['optim_g'].pop('type')
# if optim_type == 'Adam':
# self.optimizer_g = torch.optim.Adam(optim_params,
# **train_opt['optim_g'])
# else:
# raise NotImplementedError(
# f'optimizer {optim_type} is not supperted yet.')
# self.optimizers.append(self.optimizer_g)
def set_input(self, input):
self.lq = paddle.to_tensor(input['lq'])
if 'gt' in input:
self.gt = paddle.to_tensor(input['gt'])
self.image_paths = input['lq_path']
# self.lq = data['lq'].to(self.device)
# if 'gt' in data:
# self.gt = data['gt'].to(self.device)
def forward(self):
pass
def test(self):
"""Forward function used in test time.
"""
with paddle.no_grad():
self.output = self.netG(self.lq)
def optimize_parameters(self):
self.optimizer_G.clear_grad()
self.output = self.netG(self.lq)
l_total = 0
loss_dict = OrderedDict()
# pixel loss
if self.criterionL1:
l_pix = self.criterionL1(self.output, self.gt)
l_total += l_pix
loss_dict['l_pix'] = l_pix
# perceptual loss
# if self.cri_perceptual:
# l_percep, l_style = self.cri_perceptual(self.output, self.gt)
# if l_percep is not None:
# l_total += l_percep
# loss_dict['l_percep'] = l_percep
# if l_style is not None:
# l_total += l_style
# loss_dict['l_style'] = l_style
l_total.backward()
self.loss_l_total = l_total
self.optimizer_G.step()
# self.log_dict = self.reduce_loss_dict(loss_dict)
# def get_current_visuals(self):
# out_dict = OrderedDict()
# out_dict['lq'] = self.lq.detach().cpu()
# out_dict['result'] = self.output.detach().cpu()
# if hasattr(self, 'gt'):
# out_dict['gt'] = self.gt.detach().cpu()
# return out_dict
# def test(self):
# self.net_g.eval()
# with torch.no_grad():
# self.output = self.net_g(self.lq)
# self.net_g.train()
# def dist_validation(self, dataloader, current_iter, tb_logger, save_img):
# logger = get_root_logger()
# logger.info('Only support single GPU validation.')
# self.nondist_validation(dataloader, current_iter, tb_logger, save_img)
# def nondist_validation(self, dataloader, current_iter, tb_logger,
# save_img):
# dataset_name = dataloader.dataset.opt['name']
# with_metrics = self.opt['val'].get('metrics') is not None
# if with_metrics:
# self.metric_results = {
# metric: 0
# for metric in self.opt['val']['metrics'].keys()
# }
# pbar = ProgressBar(len(dataloader))
# for idx, val_data in enumerate(dataloader):
# img_name = osp.splitext(osp.basename(val_data['lq_path'][0]))[0]
# self.feed_data(val_data)
# self.test()
# visuals = self.get_current_visuals()
# sr_img = tensor2img([visuals['result']])
# if 'gt' in visuals:
# gt_img = tensor2img([visuals['gt']])
# del self.gt
# # tentative for out of GPU memory
# del self.lq
# del self.output
# torch.cuda.empty_cache()
# if save_img:
# if self.opt['is_train']:
# save_img_path = osp.join(self.opt['path']['visualization'],
# img_name,
# f'{img_name}_{current_iter}.png')
# else:
# if self.opt['val']['suffix']:
# save_img_path = osp.join(
# self.opt['path']['visualization'], dataset_name,
# f'{img_name}_{self.opt["val"]["suffix"]}.png')
# else:
# save_img_path = osp.join(
# self.opt['path']['visualization'], dataset_name,
# f'{img_name}_{self.opt["name"]}.png')
# mmcv.imwrite(sr_img, save_img_path)
# if with_metrics:
# # calculate metrics
# opt_metric = deepcopy(self.opt['val']['metrics'])
# for name, opt_ in opt_metric.items():
# metric_type = opt_.pop('type')
# self.metric_results[name] += getattr(
# metric_module, metric_type)(sr_img, gt_img, **opt_)
# pbar.update(f'Test {img_name}')
# if with_metrics:
# for metric in self.metric_results.keys():
# self.metric_results[metric] /= (idx + 1)
# self._log_validation_metric_values(current_iter, dataset_name,
# tb_logger)
# def _log_validation_metric_values(self, current_iter, dataset_name,
# tb_logger):
# log_str = f'Validation {dataset_name}\n'
# for metric, value in self.metric_results.items():
# log_str += f'\t # {metric}: {value:.4f}\n'
# logger = get_root_logger()
# logger.info(log_str)
# if tb_logger:
# for metric, value in self.metric_results.items():
# tb_logger.add_scalar(f'metrics/{metric}', value, current_iter)
# def save(self, epoch, current_iter):
# self.save_network(self.net_g, 'net_g', current_iter)
# self.save_training_state(epoch, current_iter)
# import logging
from collections import OrderedDict
import paddle
import paddle.nn as nn
# import torch.nn.parallel as P
# from torch.nn.parallel import DataParallel, DistributedDataParallel
# import models.networks as networks
# import models.lr_scheduler as lr_scheduler
from .generators.builder import build_generator
from .base_model import BaseModel
from .losses import GANLoss
from .builder import MODELS
# logger = logging.getLogger('base')
@MODELS.register()
class SRGANModel(BaseModel):
def __init__(self, cfg):
super(SRGANModel, self).__init__(cfg)
# if opt['dist']:
# self.rank = torch.distributed.get_rank()
# else:
# self.rank = -1 # non dist training
# train_opt = opt['train']
# define networks and load pretrained models
self.model_names = ['G']
self.netG = build_generator(cfg.model.generator)
self.visual_names = ['LQ', 'GT', 'fake_H']
# self.netG = networks.define_G(opt).to(self.device)
# if opt['dist']:
# self.netG = DistributedDataParallel(self.netG, device_ids=[torch.cuda.current_device()])
# else:
# self.netG = DataParallel(self.netG)
if False:#self.is_train:
self.netD = build_discriminator(cfg.model.discriminator)
# if opt['dist']:
# self.netD = DistributedDataParallel(self.netD,
# device_ids=[torch.cuda.current_device()])
# else:
# self.netD = DataParallel(self.netD)
self.netG.train()
self.netD.train()
# define losses, optimizer and scheduler
# if self.is_train:
# pass
# G pixel loss
# if train_opt['pixel_weight'] > 0:
# l_pix_type = train_opt['pixel_criterion']
# if l_pix_type == 'l1':
# self.cri_pix = nn.L1Loss().to(self.device)
# elif l_pix_type == 'l2':
# self.cri_pix = nn.MSELoss().to(self.device)
# else:
# raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_pix_type))
# self.l_pix_w = train_opt['pixel_weight']
# else:
# # logger.info('Remove pixel loss.')
# self.cri_pix = None
# # G feature loss
# if train_opt['feature_weight'] > 0:
# l_fea_type = train_opt['feature_criterion']
# if l_fea_type == 'l1':
# self.cri_fea = nn.L1Loss().to(self.device)
# elif l_fea_type == 'l2':
# self.cri_fea = nn.MSELoss().to(self.device)
# else:
# raise NotImplementedError('Loss type [{:s}] not recognized.'.format(l_fea_type))
# self.l_fea_w = train_opt['feature_weight']
# else:
# logger.info('Remove feature loss.')
# self.cri_fea = None
# if self.cri_fea: # load VGG perceptual loss
# self.netF = networks.define_F(opt, use_bn=False).to(self.device)
# if opt['dist']:
# self.netF = DistributedDataParallel(self.netF,
# device_ids=[torch.cuda.current_device()])
# else:
# self.netF = DataParallel(self.netF)
# # GD gan loss
# self.cri_gan = GANLoss(train_opt['gan_type'], 1.0, 0.0).to(self.device)
# self.l_gan_w = train_opt['gan_weight']
# # D_update_ratio and D_init_iters
# self.D_update_ratio = train_opt['D_update_ratio'] if train_opt['D_update_ratio'] else 1
# self.D_init_iters = train_opt['D_init_iters'] if train_opt['D_init_iters'] else 0
# # optimizers
# # G
# wd_G = train_opt['weight_decay_G'] if train_opt['weight_decay_G'] else 0
# optim_params = []
# for k, v in self.netG.named_parameters(): # can optimize for a part of the model
# if v.requires_grad:
# optim_params.append(v)
# else:
# if self.rank <= 0:
# logger.warning('Params [{:s}] will not optimize.'.format(k))
# self.optimizer_G = torch.optim.Adam(optim_params, lr=train_opt['lr_G'],
# weight_decay=wd_G,
# betas=(train_opt['beta1_G'], train_opt['beta2_G']))
# self.optimizers.append(self.optimizer_G)
# # D
# wd_D = train_opt['weight_decay_D'] if train_opt['weight_decay_D'] else 0
# self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=train_opt['lr_D'],
# weight_decay=wd_D,
# betas=(train_opt['beta1_D'], train_opt['beta2_D']))
# self.optimizers.append(self.optimizer_D)
# # schedulers
# if train_opt['lr_scheme'] == 'MultiStepLR':
# for optimizer in self.optimizers:
# self.schedulers.append(
# lr_scheduler.MultiStepLR_Restart(optimizer, train_opt['lr_steps'],
# restarts=train_opt['restarts'],
# weights=train_opt['restart_weights'],
# gamma=train_opt['lr_gamma'],
# clear_state=train_opt['clear_state']))
# elif train_opt['lr_scheme'] == 'CosineAnnealingLR_Restart':
# for optimizer in self.optimizers:
# self.schedulers.append(
# lr_scheduler.CosineAnnealingLR_Restart(
# optimizer, train_opt['T_period'], eta_min=train_opt['eta_min'],
# restarts=train_opt['restarts'], weights=train_opt['restart_weights']))
# else:
# raise NotImplementedError('MultiStepLR learning rate scheme is enough.')
# self.log_dict = OrderedDict()
# self.print_network() # print network
# self.load() # load G and D if needed
def set_input(self, input):
"""Unpack input data from the dataloader and perform necessary pre-processing steps.
Parameters:
input (dict): include the data itself and its metadata information.
The option 'direction' can be used to swap images in domain A and domain B.
"""
# AtoB = self.opt.dataset.train.direction == 'AtoB'
if 'A' in input:
self.LQ = paddle.to_tensor(input['A'])
if 'B' in input:
self.GT = paddle.to_tensor(input['B'])
if 'A_paths' in input:
self.image_paths = input['A_paths']
# def feed_data(self, data, need_GT=True):
# self.var_L = data['LQ'].to(self.device) # LQ
# if need_GT:
# self.var_H = data['GT'].to(self.device) # GT
# input_ref = data['ref'] if 'ref' in data else data['GT']
# self.var_ref = input_ref.to(self.device)
def forward(self):
self.fake_H = self.netG(self.LQ)
def optimize_parameters(self, step):
pass
# # G
# for p in self.netD.parameters():
# p.requires_grad = False
# self.optimizer_G.zero_grad()
# self.fake_H = self.netG(self.var_L.detach())
# l_g_total = 0
# if step % self.D_update_ratio == 0 and step > self.D_init_iters:
# if self.cri_pix: # pixel loss
# l_g_pix = self.l_pix_w * self.cri_pix(self.fake_H, self.var_H)
# l_g_total += l_g_pix
# if self.cri_fea: # feature loss
# real_fea = self.netF(self.var_H).detach()
# fake_fea = self.netF(self.fake_H)
# l_g_fea = self.l_fea_w * self.cri_fea(fake_fea, real_fea)
# l_g_total += l_g_fea
# pred_g_fake = self.netD(self.fake_H)
# if self.opt['train']['gan_type'] == 'gan':
# l_g_gan = self.l_gan_w * self.cri_gan(pred_g_fake, True)
# elif self.opt['train']['gan_type'] == 'ragan':
# pred_d_real = self.netD(self.var_ref).detach()
# l_g_gan = self.l_gan_w * (
# self.cri_gan(pred_d_real - torch.mean(pred_g_fake), False) +
# self.cri_gan(pred_g_fake - torch.mean(pred_d_real), True)) / 2
# l_g_total += l_g_gan
# l_g_total.backward()
# self.optimizer_G.step()
# # D
# for p in self.netD.parameters():
# p.requires_grad = True
# self.optimizer_D.zero_grad()
# l_d_total = 0
# pred_d_real = self.netD(self.var_ref)
# pred_d_fake = self.netD(self.fake_H.detach()) # detach to avoid BP to G
# if self.opt['train']['gan_type'] == 'gan':
# l_d_real = self.cri_gan(pred_d_real, True)
# l_d_fake = self.cri_gan(pred_d_fake, False)
# l_d_total = l_d_real + l_d_fake
# elif self.opt['train']['gan_type'] == 'ragan':
# l_d_real = self.cri_gan(pred_d_real - torch.mean(pred_d_fake), True)
# l_d_fake = self.cri_gan(pred_d_fake - torch.mean(pred_d_real), False)
# l_d_total = (l_d_real + l_d_fake) / 2
# l_d_total.backward()
# self.optimizer_D.step()
# # set log
# if step % self.D_update_ratio == 0 and step > self.D_init_iters:
# if self.cri_pix:
# self.log_dict['l_g_pix'] = l_g_pix.item()
# # self.log_dict['l_g_mean_color'] = l_g_mean_color.item()
# if self.cri_fea:
# self.log_dict['l_g_fea'] = l_g_fea.item()
# self.log_dict['l_g_gan'] = l_g_gan.item()
# self.log_dict['l_d_real'] = l_d_real.item()
# self.log_dict['l_d_fake'] = l_d_fake.item()
# self.log_dict['D_real'] = torch.mean(pred_d_real.detach())
# self.log_dict['D_fake'] = torch.mean(pred_d_fake.detach())
# def test(self):
# self.netG.eval()
# with torch.no_grad():
# self.fake_H = self.netG(self.var_L)
# self.netG.train()
# def back_projection(self):
# lr_error = self.var_L - torch.nn.functional.interpolate(self.fake_H,
# scale_factor=1/self.opt['scale'],
# mode='bicubic',
# align_corners=False)
# us_error = torch.nn.functional.interpolate(lr_error,
# scale_factor=self.opt['scale'],
# mode='bicubic',
# align_corners=False)
# self.fake_H += self.opt['back_projection_lamda'] * us_error
# torch.clamp(self.fake_H, 0, 1)
# def test_chop(self):
# self.netG.eval()
# with torch.no_grad():
# self.fake_H = self.forward_chop(self.var_L)
# self.netG.train()
# def forward_chop(self, *args, shave=10, min_size=160000):
# # scale = 1 if self.input_large else self.scale[self.idx_scale]
# scale = self.opt['scale']
# n_GPUs = min(torch.cuda.device_count(), 4)
# args = [a.squeeze().unsqueeze(0) for a in args]
# # height, width
# h, w = args[0].size()[-2:]
# # print('len(args)', len(args))
# # print('args[0].size()', args[0].size())
# top = slice(0, h//2 + shave)
# bottom = slice(h - h//2 - shave, h)
# left = slice(0, w//2 + shave)
# right = slice(w - w//2 - shave, w)
# x_chops = [torch.cat([
# a[..., top, left],
# a[..., top, right],
# a[..., bottom, left],
# a[..., bottom, right]
# ]) for a in args]
# # print('len(x_chops)', len(x_chops))
# # print('x_chops[0].size()', x_chops[0].size())
# y_chops = []
# if h * w < 4 * min_size:
# for i in range(0, 4, n_GPUs):
# x = [x_chop[i:(i + n_GPUs)] for x_chop in x_chops]
# # print(len(x))
# # print(x[0].size())
# y = P.data_parallel(self.netG, *x, range(n_GPUs))
# if not isinstance(y, list): y = [y]
# if not y_chops:
# y_chops = [[c for c in _y.chunk(n_GPUs, dim=0)] for _y in y]
# else:
# for y_chop, _y in zip(y_chops, y):
# y_chop.extend(_y.chunk(n_GPUs, dim=0))
# else:
# # print(x_chops[0].size())
# for p in zip(*x_chops):
# # print('len(p)', len(p))
# # print('p[0].size()', p[0].size())
# y = self.forward_chop(*p, shave=shave, min_size=min_size)
# if not isinstance(y, list): y = [y]
# if not y_chops:
# y_chops = [[_y] for _y in y]
# else:
# for y_chop, _y in zip(y_chops, y): y_chop.append(_y)
# h *= scale
# w *= scale
# top = slice(0, h//2)
# bottom = slice(h - h//2, h)
# bottom_r = slice(h//2 - h, None)
# left = slice(0, w//2)
# right = slice(w - w//2, w)
# right_r = slice(w//2 - w, None)
# # batch size, number of color channels
# b, c = y_chops[0][0].size()[:-2]
# y = [y_chop[0].new(b, c, h, w) for y_chop in y_chops]
# for y_chop, _y in zip(y_chops, y):
# _y[..., top, left] = y_chop[0][..., top, left]
# _y[..., top, right] = y_chop[1][..., top, right_r]
# _y[..., bottom, left] = y_chop[2][..., bottom_r, left]
# _y[..., bottom, right] = y_chop[3][..., bottom_r, right_r]
# if len(y) == 1:
# y = y[0]
# return y
# def get_current_log(self):
# return self.log_dict
# def get_current_visuals(self, need_GT=True):
# out_dict = OrderedDict()
# out_dict['LQ'] = self.var_L.detach()[0].float().cpu()
# out_dict['SR'] = self.fake_H.detach()[0].float().cpu()
# if need_GT:
# out_dict['GT'] = self.var_H.detach()[0].float().cpu()
# return out_dict
# def print_network(self):
# # Generator
# s, n = self.get_network_description(self.netG)
# if isinstance(self.netG, nn.DataParallel) or isinstance(self.netG, DistributedDataParallel):
# net_struc_str = '{} - {}'.format(self.netG.__class__.__name__,
# self.netG.module.__class__.__name__)
# else:
# net_struc_str = '{}'.format(self.netG.__class__.__name__)
# if self.rank <= 0:
# logger.info('Network G structure: {}, with parameters: {:,d}'.format(net_struc_str, n))
# logger.info(s)
# if self.is_train:
# # Discriminator
# s, n = self.get_network_description(self.netD)
# if isinstance(self.netD, nn.DataParallel) or isinstance(self.netD,
# DistributedDataParallel):
# net_struc_str = '{} - {}'.format(self.netD.__class__.__name__,
# self.netD.module.__class__.__name__)
# else:
# net_struc_str = '{}'.format(self.netD.__class__.__name__)
# if self.rank <= 0:
# logger.info('Network D structure: {}, with parameters: {:,d}'.format(
# net_struc_str, n))
# logger.info(s)
# if self.cri_fea: # F, Perceptual Network
# s, n = self.get_network_description(self.netF)
# if isinstance(self.netF, nn.DataParallel) or isinstance(
# self.netF, DistributedDataParallel):
# net_struc_str = '{} - {}'.format(self.netF.__class__.__name__,
# self.netF.module.__class__.__name__)
# else:
# net_struc_str = '{}'.format(self.netF.__class__.__name__)
# if self.rank <= 0:
# logger.info('Network F structure: {}, with parameters: {:,d}'.format(
# net_struc_str, n))
# logger.info(s)
# def load(self):
# load_path_G = self.opt['path']['pretrain_model_G']
# if load_path_G is not None:
# logger.info('Loading model for G [{:s}] ...'.format(load_path_G))
# self.load_network(load_path_G, self.netG, self.opt['path']['strict_load'])
# load_path_D = self.opt['path']['pretrain_model_D']
# if self.opt['is_train'] and load_path_D is not None:
# logger.info('Loading model for D [{:s}] ...'.format(load_path_D))
# self.load_network(load_path_D, self.netD, self.opt['path']['strict_load'])
# def save(self, iter_step):
# self.save_network(self.netG, 'G', iter_step)
# self.save_network(self.netD, 'D', iter_step)
......@@ -69,21 +69,59 @@ class BCEWithLogitsLoss():
return out
# class BCEWithLogitsLoss(fluid.dygraph.Layer):
# def __init__(self, weight=None, reduction='mean'):
# if reduction not in ['sum', 'mean', 'none']:
# raise ValueError(
# "The value of 'reduction' in bce_loss should be 'sum', 'mean' or 'none', but "
# "received %s, which is not allowed." % reduction)
# super(BCEWithLogitsLoss, self).__init__()
# # self.weight = weight
# # self.reduction = reduction
# self.bce_loss = paddle.nn.BCELoss(weight, reduction)
# def forward(self, input, label):
# input = paddle.nn.functional.sigmoid(input, True)
# return self.bce_loss(input, label)
class _SpectralNorm(paddle.nn.SpectralNorm):
def __init__(self,
weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype)
def forward(self, weight):
paddle.fluid.data_feeder.check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0
self._helper.append_op(
type="spectral_norm",
inputs=inputs,
outputs={"Out": out, },
attrs={
"dim": self._dim,
"power_iters": _power_iters,
"eps": self._eps,
})
return out
class Spectralnorm(paddle.nn.Layer):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype)
self.dim = dim
self.power_iters = power_iters
self.eps = eps
self.layer = layer
weight = layer._parameters['weight']
del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype)
self.weight_orig.set_value(weight)
def forward(self, x):
weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight
out = self.layer(x)
return out
def initial_type(
......
import os
import sys
def video2frames(video_path, outpath, **kargs):
def _dict2str(kargs):
cmd_str = ''
for k, v in kargs.items():
cmd_str += (' ' + str(k) + ' ' + str(v))
return cmd_str
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = video_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, vid_name)
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
cmd = ffmpeg
cmd = ffmpeg + [' -i ', video_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd) + _dict2str(kargs)
if os.system(cmd) != 0:
raise RuntimeError('ffmpeg process video: {} error'.format(vid_name))
sys.stdout.flush()
return out_full_path
def frames2video(frame_path, video_path, r):
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', frame_path, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', video_path
]
cmd = ''.join(cmd)
if os.system(cmd) != 0:
raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
sys.stdout.flush()
\ No newline at end of file
......@@ -2,7 +2,7 @@ import numpy as np
from PIL import Image
def tensor2img(input_image, imtype=np.uint8):
def tensor2img(input_image, min_max=(-1., 1.), imtype=np.uint8):
""""Converts a Tensor array into a numpy image array.
Parameters:
......@@ -15,8 +15,8 @@ def tensor2img(input_image, imtype=np.uint8):
image_numpy = image_numpy[0]
if image_numpy.shape[0] == 1: # grayscale to RGB
image_numpy = np.tile(image_numpy, (3, 1, 1))
# image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling
image_numpy = image_numpy.clip(0, 1)
image_numpy = image_numpy.clip(min_max[0], min_max[1])
image_numpy = (image_numpy - min_max[0]) / (min_max[1] - min_max[0])
image_numpy = (np.transpose(image_numpy, (1, 2, 0))) * 255.0 # post-processing: tranpose and scaling
else: # if it is a numpy array, do nothing
image_numpy = input_image
......
tqdm
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup
from io import open
with open('requirments.txt', encoding="utf-8-sig") as f:
requirements = f.readlines()
def readme():
with open('doc/doc_en/whl_en.md', encoding="utf-8-sig") as f:
README = f.read()
return README
setup(
name='ppgan',
packages=['ppgan'],
include_package_data=True,
entry_points={"console_scripts": ["paddlegan= paddlegan.paddlegan:main"]},
version='0.1.0',
install_requires=requirements,
license='Apache License 2.0',
description='Awesome GAN toolkits based on PaddlePaddle',
url='https://github.com/PaddlePaddle/PaddleGAN',
download_url='https://github.com/PaddlePaddle/PaddleGAN.git',
keywords=[
'gan paddlegan'
],
classifiers=[
'Intended Audience :: Developers', 'Operating System :: OS Independent',
'Natural Language :: Chinese (Simplified)',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7', 'Topic :: Utilities'
], )
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册