未验证 提交 c35db907 编写于 作者: L LielinJiang 提交者: GitHub

Merge pull request #10 from LielinJiang/fix-old-video

Add DeOldify
......@@ -91,4 +91,3 @@ parser.add_argument('--use_cuda',
help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
args = parser.parse_args()
import os, sys
import math
import random
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time
import glob
import shutil
import numpy as np
from imageio import imread, imsave
import cv2
import paddle.fluid as fluid
from paddle.incubate.hapi.download import get_path_from_url
import networks
from util import *
from my_args import args
from my_args import parser
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
def infer_engine(model_dir,
run_mode='fluid',
......@@ -76,18 +80,17 @@ class VideoFrameInterp(object):
key_frame_thread=0.,
output_path='output'):
self.video_path = video_path
self.output_path = output_path
self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.model_path = model_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.exe, self.program, self.fetch_targets = executor(model_path,
use_gpu=use_gpu)
# self.predictor = load_predictor(
# model_dir,
# run_mode=run_mode,
# min_subgraph_size=3,
# use_gpu=use_gpu)
def run(self):
frame_path_input = os.path.join(self.output_path, 'frames-input')
......@@ -269,9 +272,12 @@ class VideoFrameInterp(object):
os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output,
r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step, args.saved_model,
args.video_path, args.output_path)
predictor.run()
......@@ -7,14 +7,9 @@ VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
OUT_PATH=output
MODEL_PATH=DAIN_paddle_weight
#CUDA_VISIBLE_DEVICES=1 python demo.py \
# --time_step 0.125 \
# --video_path=$VID_PATH \
# --output_path=$OUT_PATH \
# --saved_model=$MODEL_PATH
CUDA_VISIBLE_DEVICES=2 python predict.py \
--time_step 0.125 \
--video_path=$VID_PATH \
--output_path=$OUT_PATH \
--saved_model=$MODEL_PATH
--saved_model=$MODEL_PATH
\ No newline at end of file
......@@ -95,8 +95,12 @@ def combine_frames(input, interpolated, combined, num_frames):
dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
import numpy as np
import paddle
import paddle.nn as nn
def is_listy(x):
return isinstance(x, (tuple,list))
class Hook():
"Create a hook on `m` with `hook_func`."
def __init__(self, m, hook_func, is_forward=True, detach=True):
self.hook_func,self.detach,self.stored = hook_func,detach,None
f = m.register_forward_post_hook if is_forward else m.register_backward_hook
self.hook = f(self.hook_fn)
self.removed = False
def hook_fn(self, module, input, output):
"Applies `hook_func` to `module`, `input`, `output`."
if self.detach:
input = (o.detach() for o in input ) if is_listy(input ) else input.detach()
output = (o.detach() for o in output) if is_listy(output) else output.detach()
self.stored = self.hook_func(module, input, output)
def remove(self):
"Remove the hook from the model."
if not self.removed:
self.hook.remove()
self.removed=True
def __enter__(self, *args): return self
def __exit__(self, *args): self.remove()
class Hooks():
"Create several hooks on the modules in `ms` with `hook_func`."
def __init__(self, ms, hook_func, is_forward=True, detach=True):
self.hooks = []
try:
for m in ms:
self.hooks.append(Hook(m, hook_func, is_forward, detach))
except Exception as e:
print(e)
def __getitem__(self,i:int)->Hook: return self.hooks[i]
def __len__(self)->int: return len(self.hooks)
def __iter__(self): return iter(self.hooks)
@property
def stored(self): return [o.stored for o in self]
def remove(self):
"Remove the hooks from the model."
for h in self.hooks: h.remove()
def __enter__(self, *args): return self
def __exit__ (self, *args): self.remove()
def _hook_inner(m,i,o): return o if isinstance(o, paddle.framework.Variable) else o if is_listy(o) else list(o)
def hook_output (module, detach=True, grad=False):
"Return a `Hook` that stores activations of `module` in `self.stored`"
return Hook(module, _hook_inner, detach=detach, is_forward=not grad)
def hook_outputs(modules, detach=True, grad=False):
"Return `Hooks` that store activations of all `modules` in `self.stored`"
return Hooks(modules, _hook_inner, detach=detach, is_forward=not grad)
def model_sizes(m, size=(64,64)):
"Pass a dummy input through the model `m` to get the various sizes of activations."
with hook_outputs(m) as hooks:
x = dummy_eval(m, size)
return [o.stored.shape for o in hooks]
def dummy_eval(m, size=(64,64)):
"Pass a `dummy_batch` in evaluation mode in `m` with `size`."
m.eval()
return m(dummy_batch(size))
def dummy_batch(size=(64,64), ch_in=3):
"Create a dummy batch to go through `m` with `size`."
arr = np.random.rand(1, ch_in, *size).astype('float32') * 2 - 1
return paddle.to_tensor(arr)
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from resnet_backbone import resnet34, resnet101
from hook import hook_outputs, model_sizes, dummy_eval
# from weight_norm import weight_norm
from spectral_norm import Spectralnorm
from conv import Conv1D
from paddle import fluid
class SequentialEx(nn.Layer):
"Like `nn.Sequential`, but with ModuleList semantics, and can access module input"
def __init__(self, *layers):
super().__init__()
self.layers = nn.LayerList(layers)
def forward(self, x):
res = x
for l in self.layers:
if isinstance(l, MergeLayer):
l.orig = x
nres = l(res)
# We have to remove res.orig to avoid hanging refs and therefore memory leaks
# l.orig = None
res = nres
return res
def __getitem__(self,i): return self.layers[i]
def append(self,l): return self.layers.append(l)
def extend(self,l): return self.layers.extend(l)
def insert(self,i,l): return self.layers.insert(i,l)
class Deoldify(SequentialEx):
def __init__(self, encoder, n_classes, blur=False, blur_final=True, self_attention=False, y_range=None, last_cross=True, bottle=False, norm_type='Batch', nf_factor=1, **kwargs):
imsize = (256, 256)
sfs_szs = model_sizes(encoder, size=imsize)
sfs_idxs = list(reversed(_get_sfs_idxs(sfs_szs)))
self.sfs = hook_outputs([encoder[i] for i in sfs_idxs], detach=False)
x = dummy_eval(encoder, imsize).detach()
nf = 512 * nf_factor
extra_bn = norm_type == 'Spectral'
ni = sfs_szs[-1][1]
middle_conv = nn.Sequential(
custom_conv_layer(
ni, ni * 2, norm_type=norm_type, extra_bn=extra_bn
),
custom_conv_layer(
ni * 2, ni, norm_type=norm_type, extra_bn=extra_bn
),
)
layers = [encoder, nn.BatchNorm(ni), nn.ReLU(), middle_conv]
for i, idx in enumerate(sfs_idxs):
not_final = i != len(sfs_idxs) - 1
up_in_c, x_in_c = int(x.shape[1]), int(sfs_szs[idx][1])
do_blur = blur and (not_final or blur_final)
sa = self_attention and (i == len(sfs_idxs) - 3)
n_out = nf if not_final else nf // 2
unet_block = UnetBlockWide(
up_in_c,
x_in_c,
n_out,
self.sfs[i],
final_div=not_final,
blur=blur,
self_attention=sa,
norm_type=norm_type,
extra_bn=extra_bn,
**kwargs
)
unet_block.eval()
layers.append(unet_block)
x = unet_block(x)
ni = x.shape[1]
if imsize != sfs_szs[0][-2:]:
layers.append(PixelShuffle_ICNR(ni, **kwargs))
if last_cross:
layers.append(MergeLayer(dense=True))
ni += 3
layers.append(res_block(ni, bottle=bottle, norm_type=norm_type, **kwargs))
layers += [
custom_conv_layer(ni, n_classes, ks=1, use_activ=False, norm_type=norm_type)
]
if y_range is not None:
layers.append(SigmoidRange(*y_range))
super().__init__(*layers)
def custom_conv_layer(
ni: int,
nf: int,
ks: int = 3,
stride: int = 1,
padding: int = None,
bias: bool = None,
is_1d: bool = False,
norm_type='Batch',
use_activ: bool = True,
leaky: float = None,
transpose: bool = False,
self_attention: bool = False,
extra_bn: bool = False,
**kwargs
):
"Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
if padding is None:
padding = (ks - 1) // 2 if not transpose else 0
bn = norm_type in ('Batch', 'Batchzero') or extra_bn == True
if bias is None:
bias = not bn
conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding)
if norm_type == 'Weight':
print('use weight norm')
conv = nn.utils.weight_norm(conv)
elif norm_type == 'Spectral':
# pass
conv = Spectralnorm(conv)
layers = [conv]
if use_activ:
layers.append(relu(True, leaky=leaky))
if bn:
layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf))
if self_attention:
layers.append(SelfAttention(nf))
return nn.Sequential(*layers)
def relu(inplace:bool=False, leaky:float=None):
"Return a relu activation, maybe `leaky` and `inplace`."
return nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU()
class UnetBlockWide(nn.Layer):
"A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
def __init__(
self,
up_in_c: int,
x_in_c: int,
n_out: int,
hook,
final_div: bool = True,
blur: bool = False,
leaky: float = None,
self_attention: bool = False,
**kwargs
):
super().__init__()
self.hook = hook
up_out = x_out = n_out // 2
self.shuf = CustomPixelShuffle_ICNR(
up_in_c, up_out, blur=blur, leaky=leaky, **kwargs
)
self.bn = nn.BatchNorm(x_in_c)
ni = up_out + x_in_c
self.conv = custom_conv_layer(
ni, x_out, leaky=leaky, self_attention=self_attention, **kwargs
)
self.relu = relu(leaky=leaky)
def forward(self, up_in):
s = self.hook.stored
up_out = self.shuf(up_in)
ssh = s.shape[-2:]
if ssh != up_out.shape[-2:]:
up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest')
cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1))
return self.conv(cat_x)
class UnetBlockDeep(paddle.fluid.Layer):
"A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
def __init__(
self,
up_in_c: int,
x_in_c: int,
# hook: Hook,
final_div: bool = True,
blur: bool = False,
leaky: float = None,
self_attention: bool = False,
nf_factor: float = 1.0,
**kwargs
):
super().__init__()
self.shuf = CustomPixelShuffle_ICNR(
up_in_c, up_in_c // 2, blur=blur, leaky=leaky, **kwargs
)
self.bn = nn.BatchNorm(x_in_c)
ni = up_in_c // 2 + x_in_c
nf = int((ni if final_div else ni // 2) * nf_factor)
self.conv1 = custom_conv_layer(ni, nf, leaky=leaky, **kwargs)
self.conv2 = custom_conv_layer(
nf, nf, leaky=leaky, self_attention=self_attention, **kwargs
)
self.relu = relu(leaky=leaky)
def forward(self, up_in):
s = self.hook.stored
up_out = self.shuf(up_in)
ssh = s.shape[-2:]
if ssh != up_out.shape[-2:]:
up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest')
cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1))
return self.conv2(self.conv1(cat_x))
def ifnone(a, b):
"`a` if `a` is not None, otherwise `b`."
return b if a is None else a
class PixelShuffle_ICNR(nn.Layer):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
def __init__(self, ni:int, nf:int=None, scale:int=2, blur:bool=False, norm_type='Weight', leaky:float=None):
super().__init__()
nf = ifnone(nf, ni)
self.conv = conv_layer(ni, nf*(scale**2), ks=1, norm_type=norm_type, use_activ=False)
self.shuf = PixelShuffle(scale)
self.pad = ReplicationPad2d((1,0,1,0))
self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg')
self.relu = relu(True, leaky=leaky)
def forward(self,x):
x = self.shuf(self.relu(self.conv(x)))
return self.blur(self.pad(x)) if self.blur else x
def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, padding:int=None, bias:bool=None, is_1d:bool=False,
norm_type='Batch', use_activ:bool=True, leaky:float=None,
transpose:bool=False, init=None, self_attention:bool=False):
"Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
if padding is None: padding = (ks-1)//2 if not transpose else 0
bn = norm_type in ('Batch', 'BatchZero')
if bias is None: bias = not bn
conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding)
if norm_type=='Weight':
conv = nn.utils.weight_norm(conv)
elif norm_type=='Spectral':
conv = Spectralnorm(conv)
layers = [conv]
if use_activ: layers.append(relu(True, leaky=leaky))
if bn: layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf))
if self_attention: layers.append(SelfAttention(nf))
return nn.Sequential(*layers)
class CustomPixelShuffle_ICNR(paddle.fluid.Layer):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
def __init__(
self,
ni: int,
nf: int = None,
scale: int = 2,
blur: bool = False,
leaky: float = None,
**kwargs
):
super().__init__()
nf = ifnone(nf, ni)
self.conv = custom_conv_layer(
ni, nf * (scale ** 2), ks=1, use_activ=False, **kwargs
)
self.shuf = PixelShuffle(scale)
self.pad = ReplicationPad2d((1, 0, 1, 0))
self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg')
self.relu = nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU()#relu(True, leaky=leaky)
def forward(self, x):
x = self.shuf(self.relu(self.conv(x)))
return self.blur(self.pad(x)) if self.blur else x
class MergeLayer(paddle.fluid.Layer):
"Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`."
def __init__(self, dense:bool=False):
super().__init__()
self.dense=dense
self.orig = None
def forward(self, x):
out = paddle.concat([x,self.orig], axis=1) if self.dense else (x+self.orig)
self.orig = None
return out
def res_block(nf, dense:bool=False, norm_type='Batch', bottle:bool=False, **conv_kwargs):
"Resnet block of `nf` features. `conv_kwargs` are passed to `conv_layer`."
norm2 = norm_type
if not dense and (norm_type=='Batch'): norm2 = 'BatchZero'
nf_inner = nf//2 if bottle else nf
return SequentialEx(conv_layer(nf, nf_inner, norm_type=norm_type, **conv_kwargs),
conv_layer(nf_inner, nf, norm_type=norm2, **conv_kwargs),
MergeLayer(dense))
class SigmoidRange(paddle.fluid.Layer):
"Sigmoid module with range `(low,x_max)`"
def __init__(self, low, high):
super().__init__()
self.low,self.high = low,high
def forward(self, x): return sigmoid_range(x, self.low, self.high)
def sigmoid_range(x, low, high):
"Sigmoid function with range `(low, high)`"
return F.sigmoid(x) * (high - low) + low
class PixelShuffle(paddle.fluid.Layer):
def __init__(self, upscale_factor):
super(PixelShuffle, self).__init__()
self.upscale_factor = upscale_factor
def forward(self, x):
return paddle.fluid.layers.pixel_shuffle(x, self.upscale_factor)
class ReplicationPad2d(nn.Layer):
def __init__(self, size):
super(ReplicationPad2d, self).__init__()
self.size = size
def forward(self, x):
return paddle.fluid.layers.pad2d(x, self.size, mode="edge")
def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):
"Create and initialize a `nn.Conv1d` layer with spectral normalization."
conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias_attr=bias)
return Spectralnorm(conv)
class SelfAttention(nn.Layer):
"Self attention layer for nd."
def __init__(self, n_channels):
super().__init__()
self.query = conv1d(n_channels, n_channels//8)
self.key = conv1d(n_channels, n_channels//8)
self.value = conv1d(n_channels, n_channels)
self.gamma = self.create_parameter(shape=[1],
default_initializer=paddle.fluid.initializer.Constant(0.0))#nn.Parameter(tensor([0.]))
def forward(self, x):
#Notation from https://arxiv.org/pdf/1805.08318.pdf
size = x.shape
x = paddle.reshape(x, list(size[:2]) + [-1])
f,g,h = self.query(x),self.key(x),self.value(x)
beta = paddle.nn.functional.softmax(paddle.bmm(paddle.transpose(f, [0, 2, 1]), g), axis=1)
o = self.gamma * paddle.bmm(h, beta) + x
return paddle.reshape(o, size)
def _get_sfs_idxs(sizes):
"Get the indexes of the layers where the size of the activation changes."
feature_szs = [size[-1] for size in sizes]
sfs_idxs = list(
np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0]
)
if feature_szs[0] != feature_szs[1]:
sfs_idxs = [0] + sfs_idxs
return sfs_idxs
def build_model():
backbone = resnet101()
cut = -2
encoder = nn.Sequential(*list(backbone.children())[:cut])
model = Deoldify(encoder, 3, blur=True, y_range=(-3, 3), norm_type='Spectral', self_attention=True, nf_factor=2)
return model
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import cv2
import glob
import argparse
import numpy as np
import paddle
import pickle
from PIL import Image
from tqdm import tqdm
from paddle import fluid
from model import build_model
from paddle.incubate.hapi.download import get_path_from_url
parser = argparse.ArgumentParser(description='DeOldify')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--weight_path', type=str, default='none', help='Path to the reference image directory')
DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(videopath))
else:
print('Video: {} error'.format(videopath))
print('')
sys.stdout.flush()
class DeOldifyPredictor():
def __init__(self, input, output, batch_size=1, weight_path=None):
self.input = input
self.output = os.path.join(output, 'DeOldify')
self.model = build_model()
if weight_path is None:
weight_path = get_path_from_url(DeOldify_weight_url, cur_path)
state_dict, _ = paddle.load(weight_path)
self.model.load_dict(state_dict)
self.model.eval()
def norm(self, img, render_factor=32, render_base=16):
target_size = render_factor * render_base
img = img.resize((target_size, target_size), resample=Image.BILINEAR)
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img -= img_mean
img /= img_std
return img.astype('float32')
def denorm(self, img):
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img *= img_std
img += img_mean
img = img.transpose((1, 2, 0))
return (img * 255).astype('uint8')
def post_process(self, raw_color, orig):
color_np = np.asarray(raw_color)
orig_np = np.asarray(orig)
color_yuv = cv2.cvtColor(color_np, cv2.COLOR_BGR2YUV)
orig_yuv = cv2.cvtColor(orig_np, cv2.COLOR_BGR2YUV)
hires = np.copy(orig_yuv)
hires[:, :, 1:3] = color_yuv[:, :, 1:3]
final = cv2.cvtColor(hires, cv2.COLOR_YUV2BGR)
final = Image.fromarray(final)
return final
def run_single(self, img_path):
ori_img = Image.open(img_path).convert('LA').convert('RGB')
img = self.norm(ori_img)
x = paddle.to_tensor(img[np.newaxis,...])
out = self.model(x)
pred_img = self.denorm(out.numpy()[0])
pred_img = Image.fromarray(pred_img)
pred_img = pred_img.resize(ori_img.size, resample=Image.BILINEAR)
pred_img = self.post_process(pred_img, ori_img)
return pred_img
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames):
pred_img = self.run_single(frame)
frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name))
frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps)))
return frame_pattern_combined, vid_out_path
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ',
ss,
' -t ',
t,
' -i ',
vid_path,
' -r ',
r,
' -qscale:v ',
' 0.1 ',
' -start_number ',
' 0 ',
outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(vid_name))
else:
print('Video: {} error'.format(vid_name))
print('')
sys.stdout.flush()
return out_full_path
if __name__=='__main__':
paddle.enable_imperative()
args = parser.parse_args()
predictor = DeOldifyPredictor(args.input, args.output, weight_path=args.weight_path)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path)
\ No newline at end of file
import paddle
import paddle.nn as nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
class BasicBlock(paddle.fluid.Layer):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm(planes)
self.relu = nn.ReLU()
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(paddle.fluid.Layer):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
self.bn2 = nn.BatchNorm(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False)
self.bn3 = nn.BatchNorm(planes * 4)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(paddle.fluid.Layer):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias_attr=False)
self.bn1 = nn.BatchNorm(64)
self.relu = nn.ReLU()
self.maxpool = nn.Pool2D(pool_size=3, pool_stride=2, pool_padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.Pool2D(7, pool_stride=1, pool_type='avg')
self.fc = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias_attr=False),
nn.BatchNorm(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = paddle.reshape(x, (x.shape[0], -1))
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
import numpy as np
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid import layers as F
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype
import paddle
import paddle.nn as nn
class _SpectralNorm(nn.SpectralNorm):
def __init__(self,
weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype)
def forward(self, weight):
check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0
self._helper.append_op(
type="spectral_norm",
inputs=inputs,
outputs={"Out": out, },
attrs={
"dim": self._dim,
"power_iters": _power_iters, #self._power_iters,
"eps": self._eps,
})
return out
class Spectralnorm(nn.Layer):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype)
self.dim = dim
self.power_iters = power_iters
self.eps = eps
self.layer = layer
weight = layer._parameters['weight']
del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype)
self.weight_orig.set_value(weight)
def forward(self, x):
weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight
out = self.layer(x)
return out
MODEL:
name: "EDVR"
format: "png"
num_frames: 5
center: 2
num_filters: 128 #64
deform_conv_groups: 8
front_RBs: 5
back_RBs: 40 #10
predeblur: False
HR_in: False
w_TSA: True #False
INFER:
scale: 4
crop_size: 256
interval_list: [1]
random_reverse: False
number_frames: 5
batch_size: 1
file_root: "/workspace/color/input_frames"
inference_model: "/workspace/PaddleGAN/applications/EDVR/data/inference_model"
use_flip: False
use_rot: False
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
......@@ -14,83 +14,41 @@
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time
import logging
import argparse
import ast
import glob
import numpy as np
try:
import cPickle as pickle
except:
import pickle
import paddle.fluid as fluid
import cv2
from utils.config_utils import *
#import models
from reader import get_reader
#from metrics import get_metrics
from utils.utility import check_cuda
from utils.utility import check_version
logging.root.handlers = []
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
from data import EDVRDataset
from paddle.incubate.hapi.download import get_path_from_url
EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model_name',
type=str,
default='AttentionCluster',
help='name of model to train.')
parser.add_argument(
'--inference_model',
type=str,
default='./data/inference_model',
help='path of inference_model.')
parser.add_argument(
'--config',
type=str,
default='configs/attention_cluster.txt',
help='path to config file of model')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='default use gpu.')
parser.add_argument(
'--batch_size',
type=int,
default=1,
help='sample number in a batch for inference.')
parser.add_argument(
'--filelist',
'--input',
type=str,
default=None,
help='path to inferenece data file lists file.')
parser.add_argument(
'--log_interval',
type=int,
default=1,
help='mini-batch interval to log.')
help='input video path')
parser.add_argument(
'--infer_topk',
type=int,
default=20,
help='topk predictions to restore.')
parser.add_argument(
'--save_dir',
'--output',
type=str,
default=os.path.join('data', 'predict_results'),
help='directory to store results')
default='output',
help='output path')
parser.add_argument(
'--video_path',
'--weight_path',
type=str,
default=None,
help='directory to store results')
help='weight path')
args = parser.parse_args()
return args
......@@ -106,69 +64,143 @@ def get_img(pred):
return pred
def save_img(img, framename):
dirname = './demo/resultpng'
filename = os.path.join(dirname, framename+'.png')
cv2.imwrite(filename, img)
def infer(args):
# parse config
config = parse_config(args.config)
infer_config = merge_configs(config, 'infer', vars(args))
print_configs(infer_config, "Infer")
inference_model = args.inference_model
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams'
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
[inference_program, feed_list, fetch_list] = fluid.io.load_inference_model(dirname=inference_model, model_filename=model_filename, params_filename=params_filename, executor=exe)
infer_reader = get_reader(args.model_name.upper(), 'infer', infer_config)
#infer_metrics = get_metrics(args.model_name.upper(), 'infer', infer_config)
#infer_metrics.reset()
periods = []
cur_time = time.time()
for infer_iter, data in enumerate(infer_reader()):
if args.model_name == 'EDVR':
data_feed_in = [items[0] for items in data]
video_info = [items[1:] for items in data]
infer_outs = exe.run(inference_program,
fetch_list=fetch_list,
feed={feed_list[0]:np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
videonames = [item[0] for item in video_info]
framenames = [item[1] for item in video_info]
for i in range(len(infer_result_list)):
img_i = get_img(infer_result_list[i])
save_img(img_i, 'img' + videonames[i] + framenames[i])
prev_time = cur_time
dirname = os.path.dirname(framename)
if not os.path.exists(dirname):
os.makedirs(dirname)
cv2.imwrite(framename, img)
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ',
ss,
' -t ',
t,
' -i ',
vid_path,
' -r ',
r,
' -qscale:v ',
' 0.1 ',
' -start_number ',
' 0 ',
outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(vid_name))
else:
print('Video: {} error'.format(vid_name))
print('')
sys.stdout.flush()
return out_full_path
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(videopath))
else:
print('Video: {} error'.format(videopath))
print('')
sys.stdout.flush()
class EDVRPredictor:
def __init__(self, input, output, weight_path=None):
self.input = input
self.output = os.path.join(output, 'EDVR')
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
self.exe = fluid.Executor(place)
if weight_path is None:
weight_path = get_path_from_url(EDVR_weight_url, cur_path)
print(weight_path)
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams'
out = fluid.io.load_inference_model(dirname=weight_path,
model_filename=model_filename,
params_filename=params_filename,
executor=self.exe)
self.infer_prog, self.feed_list, self.fetch_list = out
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
dataset = EDVRDataset(frames)
periods = []
cur_time = time.time()
period = cur_time - prev_time
periods.append(period)
#infer_metrics.accumulate(infer_result_list)
for infer_iter, data in enumerate(dataset):
data_feed_in = [data[0]]
infer_outs = self.exe.run(self.infer_prog,
fetch_list=self.fetch_list,
feed={self.feed_list[0]:np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
if args.log_interval > 0 and infer_iter % args.log_interval == 0:
logger.info('Processed {} samples'.format(infer_iter + 1))
frame_path = data[1]
img_i = get_img(infer_result_list[0])
save_img(img_i, os.path.join(pred_frame_path, os.path.basename(frame_path)))
logger.info('[INFER] infer finished. average time: {}'.format(np.mean(periods)))
prev_time = cur_time
cur_time = time.time()
period = cur_time - prev_time
periods.append(period)
if not os.path.isdir(args.save_dir):
os.makedirs(args.save_dir)
print('Processed {} samples'.format(infer_iter + 1))
frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps)))
#infer_metrics.finalize_and_log_out(savedir=args.save_dir)
return frame_pattern_combined, vid_out_path
if __name__ == "__main__":
args = parse_args()
# check whether the installed paddle is compiled with GPU
check_cuda(args.use_gpu)
check_version()
logger.info(args)
predictor = EDVRPredictor(args.input, args.output, args.weight_path)
predictor.run()
infer(args)
from .reader_utils import regist_reader, get_reader
from .edvr_reader import EDVRReader
regist_reader("EDVR", EDVRReader)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import cv2
import math
import random
import multiprocessing
import functools
import numpy as np
import paddle
import cv2
import logging
from .reader_utils import DataReader
logger = logging.getLogger(__name__)
python_ver = sys.version_info
random.seed(0)
np.random.seed(0)
class EDVRReader(DataReader):
"""
Data reader for video super resolution task fit for EDVR model.
This is specified for REDS dataset.
"""
def __init__(self, name, mode, cfg):
super(EDVRReader, self).__init__(name, mode, cfg)
self.format = cfg.MODEL.format
self.crop_size = self.get_config_from_sec(mode, 'crop_size')
self.interval_list = self.get_config_from_sec(mode, 'interval_list')
self.random_reverse = self.get_config_from_sec(mode, 'random_reverse')
self.number_frames = self.get_config_from_sec(mode, 'number_frames')
# set batch size and file list
self.batch_size = cfg[mode.upper()]['batch_size']
self.fileroot = cfg[mode.upper()]['file_root']
self.use_flip = self.get_config_from_sec(mode, 'use_flip', False)
self.use_rot = self.get_config_from_sec(mode, 'use_rot', False)
self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads', 1)
self.buf_size = self.get_config_from_sec(mode, 'buf_size', 1024)
self.fix_random_seed = self.get_config_from_sec(mode, 'fix_random_seed', False)
if self.mode != 'infer':
self.gtroot = self.get_config_from_sec(mode, 'gt_root')
self.scale = self.get_config_from_sec(mode, 'scale', 1)
self.LR_input = (self.scale > 1)
if self.fix_random_seed:
random.seed(0)
np.random.seed(0)
self.num_reader_threads = 1
def create_reader(self):
logger.info('initialize reader ... ')
self.filelist = []
for video_name in os.listdir(self.fileroot):
if (self.mode == 'train') and (video_name in ['000', '011', '015', '020']):
continue
for frame_name in os.listdir(os.path.join(self.fileroot, video_name)):
frame_idx = frame_name.split('.')[0]
video_frame_idx = video_name + '_' + frame_idx
# for each item in self.filelist is like '010_00000015', '260_00000090'
self.filelist.append(video_frame_idx)
if self.mode == 'test' or self.mode == 'infer':
self.filelist.sort()
if self.num_reader_threads == 1:
reader_func = make_reader
else:
reader_func = make_multi_reader
if self.mode != 'infer':
return reader_func(filelist = self.filelist,
num_threads = self.num_reader_threads,
batch_size = self.batch_size,
is_training = (self.mode == 'train'),
number_frames = self.number_frames,
interval_list = self.interval_list,
random_reverse = self.random_reverse,
fileroot = self.fileroot,
crop_size = self.crop_size,
use_flip = self.use_flip,
use_rot = self.use_rot,
gtroot = self.gtroot,
LR_input = self.LR_input,
scale = self.scale,
mode = self.mode)
else:
return reader_func(filelist = self.filelist,
num_threads = self.num_reader_threads,
batch_size = self.batch_size,
is_training = (self.mode == 'train'),
number_frames = self.number_frames,
interval_list = self.interval_list,
random_reverse = self.random_reverse,
fileroot = self.fileroot,
crop_size = self.crop_size,
use_flip = self.use_flip,
use_rot = self.use_rot,
gtroot = '',
LR_input = True,
scale = 4,
mode = self.mode)
def get_sample_data(item, number_frames, interval_list, random_reverse, fileroot,
crop_size, use_flip, use_rot, gtroot, LR_input, scale, mode='train'):
video_name = item.split('_')[0]
frame_name = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
ngb_frames, name_b = get_neighbor_frames(frame_name, \
number_frames = number_frames, \
interval_list = interval_list, \
random_reverse = random_reverse)
elif (mode == 'test') or (mode == 'infer'):
ngb_frames, name_b = get_test_neighbor_frames(int(frame_name), number_frames)
else:
raise NotImplementedError('mode {} not implemented'.format(mode))
frame_name = name_b
print('key2', ngb_frames, name_b)
if mode != 'infer':
img_GT = read_img(os.path.join(gtroot, video_name, frame_name + '.png'), is_gt=True)
#print('gt_mean', np.mean(img_GT))
frame_list = []
for ngb_frm in ngb_frames:
ngb_name = "%04d"%ngb_frm
#img = read_img(os.path.join(fileroot, video_name, frame_name + '.png'))
img = read_img(os.path.join(fileroot, video_name, ngb_name + '.png'))
frame_list.append(img)
#print('img_mean', np.mean(img))
H, W, C = frame_list[0].shape
# add random crop
if (mode == 'train') or (mode == 'valid'):
if LR_input:
LQ_size = crop_size // scale
rnd_h = random.randint(0, max(0, H - LQ_size))
rnd_w = random.randint(0, max(0, W - LQ_size))
#print('rnd_h {}, rnd_w {}', rnd_h, rnd_w)
frame_list = [v[rnd_h:rnd_h + LQ_size, rnd_w:rnd_w + LQ_size, :] for v in frame_list]
rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale)
img_GT = img_GT[rnd_h_HR:rnd_h_HR + crop_size, rnd_w_HR:rnd_w_HR + crop_size, :]
else:
rnd_h = random.randint(0, max(0, H - crop_size))
rnd_w = random.randint(0, max(0, W - crop_size))
frame_list = [v[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :] for v in frame_list]
img_GT = img_GT[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :]
# add random flip and rotation
if mode != 'infer':
frame_list.append(img_GT)
if (mode == 'train') or (mode == 'valid'):
rlt = img_augment(frame_list, use_flip, use_rot)
else:
rlt = frame_list
if mode != 'infer':
frame_list = rlt[0:-1]
img_GT = rlt[-1]
else:
frame_list = rlt
# stack LQ images to NHWC, N is the frame number
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
if mode != 'infer':
img_GT = img_GT[:, :, [2, 1, 0]]
img_GT = np.transpose(img_GT, (2, 0, 1)).astype('float32')
return img_LQs, img_GT
else:
return img_LQs
def get_test_neighbor_frames(crt_i, N, max_n=100, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
name_b = '{:08d}'.format(crt_i)
return return_l, name_b
def get_neighbor_frames(frame_name, number_frames, interval_list, random_reverse, max_frame=99, bordermode=False):
center_frame_idx = int(frame_name)
half_N_frames = number_frames // 2
#### determine the neighbor frames
interval = random.choice(interval_list)
if bordermode:
direction = 1 # 1: forward; 0: backward
if random_reverse and random.random() < 0.5:
direction = random.choice([0, 1])
if center_frame_idx + interval * (number_frames - 1) > max_frame:
direction = 0
elif center_frame_idx - interval * (number_frames - 1) < 0:
direction = 1
# get the neighbor list
if direction == 1:
neighbor_list = list(
range(center_frame_idx, center_frame_idx + interval * number_frames, interval))
else:
neighbor_list = list(
range(center_frame_idx, center_frame_idx - interval * number_frames, -interval))
name_b = '{:08d}'.format(neighbor_list[0])
else:
# ensure not exceeding the borders
while (center_frame_idx + half_N_frames * interval >
max_frame) or (center_frame_idx - half_N_frames * interval < 0):
center_frame_idx = random.randint(0, max_frame)
# get the neighbor list
neighbor_list = list(
range(center_frame_idx - half_N_frames * interval,
center_frame_idx + half_N_frames * interval + 1, interval))
if random_reverse and random.random() < 0.5:
neighbor_list.reverse()
name_b = '{:08d}'.format(neighbor_list[half_N_frames])
assert len(neighbor_list) == number_frames, \
"frames slected have length({}), but it should be ({})".format(len(neighbor_list), number_frames)
return neighbor_list, name_b
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
#if not is_gt:
# #print(path)
# img = cv2.resize(img, (0, 0), fx=0.25, fy=0.25)
#print("path: ", path)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
# some images have 4 channels
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def img_augment(img_list, hflip=True, rot=True):
"""horizontal flip OR rotate (0, 90, 180, 270 degrees)"""
hflip = hflip and random.random() < 0.5
vflip = rot and random.random() < 0.5
rot90 = rot and random.random() < 0.5
def _augment(img):
if hflip:
img = img[:, ::-1, :]
if vflip:
img = img[::-1, :, :]
if rot90:
img = img.transpose(1, 0, 2)
return img
return [_augment(img) for img in img_list]
def make_reader(filelist,
num_threads,
batch_size,
is_training,
number_frames,
interval_list,
random_reverse,
fileroot,
crop_size,
use_flip,
use_rot,
gtroot,
LR_input,
scale,
mode='train'):
fl = filelist
def reader_():
if is_training:
random.shuffle(fl)
batch_out = []
for item in fl:
if mode != 'infer':
img_LQs, img_GT = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
else:
img_LQs = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
videoname = item.split('_')[0]
framename = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
batch_out.append((img_LQs, img_GT))
elif mode == 'test':
batch_out.append((img_LQs, img_GT, videoname, framename))
elif mode == 'infer':
batch_out.append((img_LQs, videoname, framename))
else:
raise NotImplementedError("mode {} not implemented".format(mode))
if len(batch_out) == batch_size:
yield batch_out
batch_out = []
return reader_
def make_multi_reader(filelist,
num_threads,
batch_size,
is_training,
number_frames,
interval_list,
random_reverse,
fileroot,
crop_size,
use_flip,
use_rot,
gtroot,
LR_input,
scale,
mode='train'):
def read_into_queue(flq, queue):
batch_out = []
for item in flq:
if mode != 'infer':
img_LQs, img_GT = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
else:
img_LQs = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
videoname = item.split('_')[0]
framename = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
batch_out.append((img_LQs, img_GT))
elif mode == 'test':
batch_out.append((img_LQs, img_GT, videoname, framename))
elif mode == 'infer':
batch_out.append((img_LQs, videoname, framename))
else:
raise NotImplementedError("mode {} not implemented".format(mode))
if len(batch_out) == batch_size:
queue.put(batch_out)
batch_out = []
queue.put(None)
def queue_reader():
fl = filelist
if is_training:
random.shuffle(fl)
n = num_threads
queue_size = 20
reader_lists = [None] * n
file_num = int(len(fl) // n)
for i in range(n):
if i < len(reader_lists) - 1:
tmp_list = fl[i * file_num:(i + 1) * file_num]
else:
tmp_list = fl[i * file_num:]
reader_lists[i] = tmp_list
queue = multiprocessing.Queue(queue_size)
p_list = [None] * len(reader_lists)
# for reader_list in reader_lists:
for i in range(len(reader_lists)):
reader_list = reader_lists[i]
p_list[i] = multiprocessing.Process(
target=read_into_queue, args=(reader_list, queue))
p_list[i].start()
reader_num = len(reader_lists)
finish_num = 0
while finish_num < reader_num:
sample = queue.get()
if sample is None:
finish_num += 1
else:
yield sample
for i in range(len(p_list)):
if p_list[i].is_alive():
p_list[i].join()
return queue_reader
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import pickle
import cv2
import numpy as np
import random
class ReaderNotFoundError(Exception):
"Error: reader not found"
def __init__(self, reader_name, avail_readers):
super(ReaderNotFoundError, self).__init__()
self.reader_name = reader_name
self.avail_readers = avail_readers
def __str__(self):
msg = "Reader {} Not Found.\nAvailiable readers:\n".format(
self.reader_name)
for reader in self.avail_readers:
msg += " {}\n".format(reader)
return msg
class DataReader(object):
"""data reader for video input"""
def __init__(self, model_name, mode, cfg):
self.name = model_name
self.mode = mode
self.cfg = cfg
def create_reader(self):
"""Not implemented"""
pass
def get_config_from_sec(self, sec, item, default=None):
if sec.upper() not in self.cfg:
return default
return self.cfg[sec.upper()].get(item, default)
class ReaderZoo(object):
def __init__(self):
self.reader_zoo = {}
def regist(self, name, reader):
assert reader.__base__ == DataReader, "Unknow model type {}".format(
type(reader))
self.reader_zoo[name] = reader
def get(self, name, mode, cfg):
for k, v in self.reader_zoo.items():
if k == name:
return v(name, mode, cfg)
raise ReaderNotFoundError(name, self.reader_zoo.keys())
# singleton reader_zoo
reader_zoo = ReaderZoo()
def regist_reader(name, reader):
reader_zoo.regist(name, reader)
def get_reader(name, mode, cfg):
reader_model = reader_zoo.get(name, mode, cfg)
return reader_model.create_reader()
......@@ -17,10 +17,10 @@ valid_interval=1
weights="./weights/paddle_state_dict_L.npz"
export CUDA_VISIBLE_DEVICES=4,5,6,7 #0,1,5,6 fast, 2,3,4,7 slow
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow
# export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_eager_delete_tensor_gb=0.0
# export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "predict"x ]; then
echo $mode $name $configs $weights
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import yaml
from .utility import AttrDict
import logging
logger = logging.getLogger(__name__)
CONFIG_SECS = [
'train',
'valid',
'test',
'infer',
]
def parse_config(cfg_file):
"""Load a config file into AttrDict"""
import yaml
with open(cfg_file, 'r') as fopen:
yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader))
create_attr_dict(yaml_config)
return yaml_config
def create_attr_dict(yaml_config):
from ast import literal_eval
for key, value in yaml_config.items():
if type(value) is dict:
yaml_config[key] = value = AttrDict(value)
if isinstance(value, str):
try:
value = literal_eval(value)
except BaseException:
pass
if isinstance(value, AttrDict):
create_attr_dict(yaml_config[key])
else:
yaml_config[key] = value
return
def merge_configs(cfg, sec, args_dict):
assert sec in CONFIG_SECS, "invalid config section {}".format(sec)
sec_dict = getattr(cfg, sec.upper())
for k, v in args_dict.items():
if v is None:
continue
try:
if hasattr(sec_dict, k):
setattr(sec_dict, k, v)
except:
pass
return cfg
def print_configs(cfg, mode):
logger.info("---------------- {:>5} Arguments ----------------".format(
mode))
for sec, sec_items in cfg.items():
logger.info("{}:".format(sec))
for k, v in sec_items.items():
logger.info(" {}:{}".format(k, v))
logger.info("-------------------------------------------------")
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import signal
import logging
import paddle
import paddle.fluid as fluid
__all__ = ['AttrDict']
logger = logging.getLogger(__name__)
def _term(sig_num, addition):
print('current pid is %s, group id is %s' % (os.getpid(), os.getpgrp()))
os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
signal.signal(signal.SIGTERM, _term)
signal.signal(signal.SIGINT, _term)
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
if key in self.__dict__:
self.__dict__[key] = value
else:
self[key] = value
def check_cuda(use_cuda, err = \
"\nYou can not set use_gpu = True in the model because you are using paddlepaddle-cpu.\n \
Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu = False to run models on CPU.\n"
):
try:
if use_cuda == True and fluid.is_compiled_with_cuda() == False:
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
cd DAIN/pwcnet/correlation_op
# 第一次需要执行
# bash make.shap
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
export PYTHONPATH=$PYTHONPATH:`pwd`
cd -
# input 输入视频的路径
# output 输出视频保存的路径
# proccess_order 使用模型的顺序
python tools/main.py \
--input input.mp4 --output output --proccess_order DAIN DeOldify EDVR
import sys
sys.path.append('.')
import argparse
import paddle
from DAIN.predict import VideoFrameInterp
from DeOldify.predict import DeOldifyPredictor
from EDVR.predict import EDVRPredictor
parser = argparse.ArgumentParser(description='Fix video')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--DAIN_weight', type=str, default=None, help='Path to the reference image directory')
parser.add_argument('--DeOldify_weight', type=str, default=None, help='Path to the reference image directory')
parser.add_argument('--EDVR_weight', type=str, default=None, help='Path to the reference image directory')
# DAIN args
parser.add_argument('--time_step', type=float, default=0.5, help='choose the time steps')
parser.add_argument('--proccess_order', type=str, default='none', nargs='+', help='Process order')
if __name__ == "__main__":
args = parser.parse_args()
print('args...', args)
orders = args.proccess_order
temp_video_path = None
for order in orders:
if order == 'DAIN':
predictor = VideoFrameInterp(args.time_step, args.DAIN_weight,
args.input, output_path=args.output)
frames_path, temp_video_path = predictor.run()
elif order == 'DeOldify':
print('frames:', frames_path)
print('video_path:', temp_video_path)
paddle.disable_static()
predictor = DeOldifyPredictor(temp_video_path, args.output, weight_path=args.DeOldify_weight)
frames_path, temp_video_path = predictor.run()
print('frames:', frames_path)
print('video_path:', temp_video_path)
paddle.enable_static()
elif order == 'EDVR':
predictor = EDVRPredictor(temp_video_path, args.output, weight_path=args.EDVR_weight)
frames_path, temp_video_path = predictor.run()
print('frames:', frames_path)
print('video_path:', temp_video_path)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部