提交 ffc46c0d 编写于 作者: L LielinJiang

add fix old video

上级 031e15f1
...@@ -91,4 +91,4 @@ parser.add_argument('--use_cuda', ...@@ -91,4 +91,4 @@ parser.add_argument('--use_cuda',
help='use cuda or not') help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not') parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
args = parser.parse_args() # args = parser.parse_args()
import os, sys import os
import math import sys
import random
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time import time
import glob import glob
import shutil
import numpy as np import numpy as np
from imageio import imread, imsave from imageio import imread, imsave
import cv2 import cv2
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.download import get_path_from_url
import networks import networks
from util import * from util import *
from my_args import args from my_args import parser
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
def infer_engine(model_dir, def infer_engine(model_dir,
run_mode='fluid', run_mode='fluid',
...@@ -76,18 +80,17 @@ class VideoFrameInterp(object): ...@@ -76,18 +80,17 @@ class VideoFrameInterp(object):
key_frame_thread=0., key_frame_thread=0.,
output_path='output'): output_path='output'):
self.video_path = video_path self.video_path = video_path
self.output_path = output_path self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.model_path = model_path self.model_path = model_path
self.time_step = time_step self.time_step = time_step
self.key_frame_thread = key_frame_thread self.key_frame_thread = key_frame_thread
self.exe, self.program, self.fetch_targets = executor(model_path, self.exe, self.program, self.fetch_targets = executor(model_path,
use_gpu=use_gpu) use_gpu=use_gpu)
# self.predictor = load_predictor(
# model_dir,
# run_mode=run_mode,
# min_subgraph_size=3,
# use_gpu=use_gpu)
def run(self): def run(self):
frame_path_input = os.path.join(self.output_path, 'frames-input') frame_path_input = os.path.join(self.output_path, 'frames-input')
...@@ -269,9 +272,12 @@ class VideoFrameInterp(object): ...@@ -269,9 +272,12 @@ class VideoFrameInterp(object):
os.remove(video_pattern_output) os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output,
r2) r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step, args.saved_model, predictor = VideoFrameInterp(args.time_step, args.saved_model,
args.video_path, args.output_path) args.video_path, args.output_path)
predictor.run() predictor.run()
...@@ -3,7 +3,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle. ...@@ -3,7 +3,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.
export PYTHONPATH=$PYTHONPATH:`pwd` export PYTHONPATH=$PYTHONPATH:`pwd`
cd ../../ cd ../../
VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4 # VID_PATH=/workspace/codes/colorization_paddle_net_weights/video/Peking_input360p.mp4
VID_PATH=/workspace/codes/colorization_paddle_net_weights/video/Peking_input360p_clip_5_15.mp4
OUT_PATH=output OUT_PATH=output
MODEL_PATH=DAIN_paddle_weight MODEL_PATH=DAIN_paddle_weight
...@@ -13,8 +14,8 @@ MODEL_PATH=DAIN_paddle_weight ...@@ -13,8 +14,8 @@ MODEL_PATH=DAIN_paddle_weight
# --output_path=$OUT_PATH \ # --output_path=$OUT_PATH \
# --saved_model=$MODEL_PATH # --saved_model=$MODEL_PATH
CUDA_VISIBLE_DEVICES=2 python predict.py \ CUDA_VISIBLE_DEVICES=5 python predict.py \
--time_step 0.125 \ --time_step 0.5 \
--video_path=$VID_PATH \ --video_path=$VID_PATH \
--output_path=$OUT_PATH \ --output_path=$OUT_PATH \
--saved_model=$MODEL_PATH --saved_model=$MODEL_PATH
...@@ -95,8 +95,12 @@ def combine_frames(input, interpolated, combined, num_frames): ...@@ -95,8 +95,12 @@ def combine_frames(input, interpolated, combined, num_frames):
dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1))) dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst) shutil.copy2(src, dst)
if i < num1 - 1: if i < num1 - 1:
for k in range(num_frames): try:
src = frames2[i * num_frames + k] for k in range(num_frames):
dst = os.path.join( src = frames2[i * num_frames + k]
combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1)) dst = os.path.join(
shutil.copy2(src, dst) combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
import numpy as np
import paddle
import paddle.nn as nn
def is_listy(x):
return isinstance(x, (tuple,list))
class Hook():
"Create a hook on `m` with `hook_func`."
def __init__(self, m, hook_func, is_forward=True, detach=True):
self.hook_func,self.detach,self.stored = hook_func,detach,None
f = m.register_forward_post_hook if is_forward else m.register_backward_hook
self.hook = f(self.hook_fn)
self.removed = False
def hook_fn(self, module, input, output):
"Applies `hook_func` to `module`, `input`, `output`."
if self.detach:
input = (o.detach() for o in input ) if is_listy(input ) else input.detach()
output = (o.detach() for o in output) if is_listy(output) else output.detach()
self.stored = self.hook_func(module, input, output)
def remove(self):
"Remove the hook from the model."
if not self.removed:
self.hook.remove()
self.removed=True
def __enter__(self, *args): return self
def __exit__(self, *args): self.remove()
class Hooks():
"Create several hooks on the modules in `ms` with `hook_func`."
def __init__(self, ms, hook_func, is_forward=True, detach=True):
self.hooks = []
try:
for m in ms:
self.hooks.append(Hook(m, hook_func, is_forward, detach))
except Exception as e:
print(e)
def __getitem__(self,i:int)->Hook: return self.hooks[i]
def __len__(self)->int: return len(self.hooks)
def __iter__(self): return iter(self.hooks)
@property
def stored(self): return [o.stored for o in self]
def remove(self):
"Remove the hooks from the model."
for h in self.hooks: h.remove()
def __enter__(self, *args): return self
def __exit__ (self, *args): self.remove()
def _hook_inner(m,i,o): return o if isinstance(o, paddle.framework.Variable) else o if is_listy(o) else list(o)
def hook_output (module, detach=True, grad=False):
"Return a `Hook` that stores activations of `module` in `self.stored`"
return Hook(module, _hook_inner, detach=detach, is_forward=not grad)
def hook_outputs(modules, detach=True, grad=False):
"Return `Hooks` that store activations of all `modules` in `self.stored`"
return Hooks(modules, _hook_inner, detach=detach, is_forward=not grad)
def model_sizes(m, size=(64,64)):
"Pass a dummy input through the model `m` to get the various sizes of activations."
with hook_outputs(m) as hooks:
x = dummy_eval(m, size)
return [o.stored.shape for o in hooks]
def dummy_eval(m, size=(64,64)):
"Pass a `dummy_batch` in evaluation mode in `m` with `size`."
m.eval()
return m(dummy_batch(size))
def dummy_batch(size=(64,64), ch_in=3):
"Create a dummy batch to go through `m` with `size`."
arr = np.random.rand(1, ch_in, *size).astype('float32') * 2 - 1
return paddle.to_tensor(arr)
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from resnet_backbone import resnet34, resnet101
from hook import hook_outputs, model_sizes, dummy_eval
# from weight_norm import weight_norm
from spectral_norm import Spectralnorm
from conv import Conv1D
from paddle import fluid
class SequentialEx(nn.Layer):
"Like `nn.Sequential`, but with ModuleList semantics, and can access module input"
def __init__(self, *layers):
super().__init__()
self.layers = nn.LayerList(layers)
def forward(self, x):
res = x
for l in self.layers:
if isinstance(l, MergeLayer):
l.orig = x
nres = l(res)
# We have to remove res.orig to avoid hanging refs and therefore memory leaks
# l.orig = None
res = nres
return res
def __getitem__(self,i): return self.layers[i]
def append(self,l): return self.layers.append(l)
def extend(self,l): return self.layers.extend(l)
def insert(self,i,l): return self.layers.insert(i,l)
class Deoldify(SequentialEx):
def __init__(self, encoder, n_classes, blur=False, blur_final=True, self_attention=False, y_range=None, last_cross=True, bottle=False, norm_type='Batch', nf_factor=1, **kwargs):
imsize = (256, 256)
sfs_szs = model_sizes(encoder, size=imsize)
sfs_idxs = list(reversed(_get_sfs_idxs(sfs_szs)))
self.sfs = hook_outputs([encoder[i] for i in sfs_idxs], detach=False)
x = dummy_eval(encoder, imsize).detach()
nf = 512 * nf_factor
extra_bn = norm_type == 'Spectral'
ni = sfs_szs[-1][1]
middle_conv = nn.Sequential(
custom_conv_layer(
ni, ni * 2, norm_type=norm_type, extra_bn=extra_bn
),
custom_conv_layer(
ni * 2, ni, norm_type=norm_type, extra_bn=extra_bn
),
)
layers = [encoder, nn.BatchNorm(ni), nn.ReLU(), middle_conv]
for i, idx in enumerate(sfs_idxs):
not_final = i != len(sfs_idxs) - 1
up_in_c, x_in_c = int(x.shape[1]), int(sfs_szs[idx][1])
do_blur = blur and (not_final or blur_final)
sa = self_attention and (i == len(sfs_idxs) - 3)
n_out = nf if not_final else nf // 2
unet_block = UnetBlockWide(
up_in_c,
x_in_c,
n_out,
self.sfs[i],
final_div=not_final,
blur=blur,
self_attention=sa,
norm_type=norm_type,
extra_bn=extra_bn,
**kwargs
)
unet_block.eval()
layers.append(unet_block)
x = unet_block(x)
ni = x.shape[1]
if imsize != sfs_szs[0][-2:]:
layers.append(PixelShuffle_ICNR(ni, **kwargs))
if last_cross:
layers.append(MergeLayer(dense=True))
ni += 3
layers.append(res_block(ni, bottle=bottle, norm_type=norm_type, **kwargs))
layers += [
custom_conv_layer(ni, n_classes, ks=1, use_activ=False, norm_type=norm_type)
]
if y_range is not None:
layers.append(SigmoidRange(*y_range))
super().__init__(*layers)
def custom_conv_layer(
ni: int,
nf: int,
ks: int = 3,
stride: int = 1,
padding: int = None,
bias: bool = None,
is_1d: bool = False,
norm_type='Batch',
use_activ: bool = True,
leaky: float = None,
transpose: bool = False,
self_attention: bool = False,
extra_bn: bool = False,
**kwargs
):
"Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
if padding is None:
padding = (ks - 1) // 2 if not transpose else 0
bn = norm_type in ('Batch', 'Batchzero') or extra_bn == True
if bias is None:
bias = not bn
conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding)
if norm_type == 'Weight':
print('use weight norm')
conv = nn.utils.weight_norm(conv)
elif norm_type == 'Spectral':
# pass
conv = Spectralnorm(conv)
layers = [conv]
if use_activ:
layers.append(relu(True, leaky=leaky))
if bn:
layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf))
if self_attention:
layers.append(SelfAttention(nf))
return nn.Sequential(*layers)
def relu(inplace:bool=False, leaky:float=None):
"Return a relu activation, maybe `leaky` and `inplace`."
return nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU()
class UnetBlockWide(nn.Layer):
"A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
def __init__(
self,
up_in_c: int,
x_in_c: int,
n_out: int,
hook,
final_div: bool = True,
blur: bool = False,
leaky: float = None,
self_attention: bool = False,
**kwargs
):
super().__init__()
self.hook = hook
up_out = x_out = n_out // 2
self.shuf = CustomPixelShuffle_ICNR(
up_in_c, up_out, blur=blur, leaky=leaky, **kwargs
)
self.bn = nn.BatchNorm(x_in_c)
ni = up_out + x_in_c
self.conv = custom_conv_layer(
ni, x_out, leaky=leaky, self_attention=self_attention, **kwargs
)
self.relu = relu(leaky=leaky)
def forward(self, up_in):
s = self.hook.stored
up_out = self.shuf(up_in)
ssh = s.shape[-2:]
if ssh != up_out.shape[-2:]:
up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest')
cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1))
return self.conv(cat_x)
class UnetBlockDeep(paddle.fluid.Layer):
"A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
def __init__(
self,
up_in_c: int,
x_in_c: int,
# hook: Hook,
final_div: bool = True,
blur: bool = False,
leaky: float = None,
self_attention: bool = False,
nf_factor: float = 1.0,
**kwargs
):
super().__init__()
self.shuf = CustomPixelShuffle_ICNR(
up_in_c, up_in_c // 2, blur=blur, leaky=leaky, **kwargs
)
self.bn = nn.BatchNorm(x_in_c)
ni = up_in_c // 2 + x_in_c
nf = int((ni if final_div else ni // 2) * nf_factor)
self.conv1 = custom_conv_layer(ni, nf, leaky=leaky, **kwargs)
self.conv2 = custom_conv_layer(
nf, nf, leaky=leaky, self_attention=self_attention, **kwargs
)
self.relu = relu(leaky=leaky)
def forward(self, up_in):
s = self.hook.stored
up_out = self.shuf(up_in)
ssh = s.shape[-2:]
if ssh != up_out.shape[-2:]:
up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest')
cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1))
return self.conv2(self.conv1(cat_x))
def ifnone(a, b):
"`a` if `a` is not None, otherwise `b`."
return b if a is None else a
class PixelShuffle_ICNR(nn.Layer):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
def __init__(self, ni:int, nf:int=None, scale:int=2, blur:bool=False, norm_type='Weight', leaky:float=None):
super().__init__()
nf = ifnone(nf, ni)
self.conv = conv_layer(ni, nf*(scale**2), ks=1, norm_type=norm_type, use_activ=False)
self.shuf = PixelShuffle(scale)
self.pad = ReplicationPad2d((1,0,1,0))
self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg')
self.relu = relu(True, leaky=leaky)
def forward(self,x):
x = self.shuf(self.relu(self.conv(x)))
return self.blur(self.pad(x)) if self.blur else x
def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, padding:int=None, bias:bool=None, is_1d:bool=False,
norm_type='Batch', use_activ:bool=True, leaky:float=None,
transpose:bool=False, init=None, self_attention:bool=False):
"Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
if padding is None: padding = (ks-1)//2 if not transpose else 0
bn = norm_type in ('Batch', 'BatchZero')
if bias is None: bias = not bn
conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding)
if norm_type=='Weight':
conv = nn.utils.weight_norm(conv)
elif norm_type=='Spectral':
conv = Spectralnorm(conv)
layers = [conv]
if use_activ: layers.append(relu(True, leaky=leaky))
if bn: layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf))
if self_attention: layers.append(SelfAttention(nf))
return nn.Sequential(*layers)
class CustomPixelShuffle_ICNR(paddle.fluid.Layer):
"Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
def __init__(
self,
ni: int,
nf: int = None,
scale: int = 2,
blur: bool = False,
leaky: float = None,
**kwargs
):
super().__init__()
nf = ifnone(nf, ni)
self.conv = custom_conv_layer(
ni, nf * (scale ** 2), ks=1, use_activ=False, **kwargs
)
self.shuf = PixelShuffle(scale)
self.pad = ReplicationPad2d((1, 0, 1, 0))
self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg')
self.relu = nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU()#relu(True, leaky=leaky)
def forward(self, x):
x = self.shuf(self.relu(self.conv(x)))
return self.blur(self.pad(x)) if self.blur else x
class MergeLayer(paddle.fluid.Layer):
"Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`."
def __init__(self, dense:bool=False):
super().__init__()
self.dense=dense
self.orig = None
def forward(self, x):
out = paddle.concat([x,self.orig], axis=1) if self.dense else (x+self.orig)
self.orig = None
return out
def res_block(nf, dense:bool=False, norm_type='Batch', bottle:bool=False, **conv_kwargs):
"Resnet block of `nf` features. `conv_kwargs` are passed to `conv_layer`."
norm2 = norm_type
if not dense and (norm_type=='Batch'): norm2 = 'BatchZero'
nf_inner = nf//2 if bottle else nf
return SequentialEx(conv_layer(nf, nf_inner, norm_type=norm_type, **conv_kwargs),
conv_layer(nf_inner, nf, norm_type=norm2, **conv_kwargs),
MergeLayer(dense))
class SigmoidRange(paddle.fluid.Layer):
"Sigmoid module with range `(low,x_max)`"
def __init__(self, low, high):
super().__init__()
self.low,self.high = low,high
def forward(self, x): return sigmoid_range(x, self.low, self.high)
def sigmoid_range(x, low, high):
"Sigmoid function with range `(low, high)`"
return F.sigmoid(x) * (high - low) + low
class PixelShuffle(paddle.fluid.Layer):
def __init__(self, upscale_factor):
super(PixelShuffle, self).__init__()
self.upscale_factor = upscale_factor
def forward(self, x):
return paddle.fluid.layers.pixel_shuffle(x, self.upscale_factor)
class ReplicationPad2d(nn.Layer):
def __init__(self, size):
super(ReplicationPad2d, self).__init__()
self.size = size
def forward(self, x):
return paddle.fluid.layers.pad2d(x, self.size, mode="edge")
def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):
"Create and initialize a `nn.Conv1d` layer with spectral normalization."
conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias_attr=bias)
return Spectralnorm(conv)
class SelfAttention(nn.Layer):
"Self attention layer for nd."
def __init__(self, n_channels):
super().__init__()
self.query = conv1d(n_channels, n_channels//8)
self.key = conv1d(n_channels, n_channels//8)
self.value = conv1d(n_channels, n_channels)
self.gamma = self.create_parameter(shape=[1],
default_initializer=paddle.fluid.initializer.Constant(0.0))#nn.Parameter(tensor([0.]))
def forward(self, x):
#Notation from https://arxiv.org/pdf/1805.08318.pdf
size = x.shape
x = paddle.reshape(x, list(size[:2]) + [-1])
f,g,h = self.query(x),self.key(x),self.value(x)
beta = paddle.nn.functional.softmax(paddle.bmm(paddle.transpose(f, [0, 2, 1]), g), axis=1)
o = self.gamma * paddle.bmm(h, beta) + x
return paddle.reshape(o, size)
def _get_sfs_idxs(sizes):
"Get the indexes of the layers where the size of the activation changes."
feature_szs = [size[-1] for size in sizes]
sfs_idxs = list(
np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0]
)
if feature_szs[0] != feature_szs[1]:
sfs_idxs = [0] + sfs_idxs
return sfs_idxs
def build_model():
backbone = resnet101()
cut = -2
encoder = nn.Sequential(*list(backbone.children())[:cut])
model = Deoldify(encoder, 3, blur=True, y_range=(-3, 3), norm_type='Spectral', self_attention=True, nf_factor=2)
return model
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import cv2
import glob
import argparse
import numpy as np
import paddle
import pickle
from PIL import Image
from tqdm import tqdm
from paddle import fluid
from model import build_model
from paddle.incubate.hapi.download import get_path_from_url
parser = argparse.ArgumentParser(description='DeOldify')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--weight_path', type=str, default='none', help='Path to the reference image directory')
DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(videopath))
else:
print('Video: {} error'.format(videopath))
print('')
sys.stdout.flush()
class DeOldifyPredictor():
def __init__(self, input, output, batch_size=1, weight_path=None):
self.input = input
self.output = os.path.join(output, 'DeOldify')
self.model = build_model()
if weight_path is None:
weight_path = get_path_from_url(DeOldify_weight_url, cur_path)
state_dict, _ = paddle.load(weight_path)
self.model.load_dict(state_dict)
self.model.eval()
def norm(self, img, render_factor=32, render_base=16):
target_size = render_factor * render_base
img = img.resize((target_size, target_size), resample=Image.BILINEAR)
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img -= img_mean
img /= img_std
return img.astype('float32')
def denorm(self, img):
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img *= img_std
img += img_mean
img = img.transpose((1, 2, 0))
return (img * 255).astype('uint8')
def post_process(self, raw_color, orig):
color_np = np.asarray(raw_color)
orig_np = np.asarray(orig)
color_yuv = cv2.cvtColor(color_np, cv2.COLOR_BGR2YUV)
orig_yuv = cv2.cvtColor(orig_np, cv2.COLOR_BGR2YUV)
hires = np.copy(orig_yuv)
hires[:, :, 1:3] = color_yuv[:, :, 1:3]
final = cv2.cvtColor(hires, cv2.COLOR_YUV2BGR)
final = Image.fromarray(final)
return final
def run_single(self, img_path):
ori_img = Image.open(img_path).convert('LA').convert('RGB')
img = self.norm(ori_img)
x = paddle.to_tensor(img[np.newaxis,...])
out = self.model(x)
pred_img = self.denorm(out.numpy()[0])
pred_img = Image.fromarray(pred_img)
pred_img = pred_img.resize(ori_img.size, resample=Image.BILINEAR)
pred_img = self.post_process(pred_img, ori_img)
return pred_img
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames):
pred_img = self.run_single(frame)
frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name))
frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps)))
return frame_pattern_combined, vid_out_path
def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
vid_name = vid_path.split('/')[-1].split('.')[0]
out_full_path = os.path.join(outpath, 'frames_input')
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = out_full_path + '/%08d.png'
if ss is not None and t is not None and r is not None:
cmd = ffmpeg + [
' -ss ',
ss,
' -t ',
t,
' -i ',
vid_path,
' -r ',
r,
' -qscale:v ',
' 0.1 ',
' -start_number ',
' 0 ',
outformat
]
else:
cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(vid_name))
else:
print('Video: {} error'.format(vid_name))
print('')
sys.stdout.flush()
return out_full_path
if __name__=='__main__':
paddle.enable_imperative()
args = parser.parse_args()
predictor = DeOldifyPredictor(args.input, args.output, weight_path=args.weight_path)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path)
\ No newline at end of file
import paddle
import paddle.nn as nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
class BasicBlock(paddle.fluid.Layer):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm(planes)
self.relu = nn.ReLU()
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(paddle.fluid.Layer):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
self.bn2 = nn.BatchNorm(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False)
self.bn3 = nn.BatchNorm(planes * 4)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(paddle.fluid.Layer):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias_attr=False)
self.bn1 = nn.BatchNorm(64)
self.relu = nn.ReLU()
self.maxpool = nn.Pool2D(pool_size=3, pool_stride=2, pool_padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.Pool2D(7, pool_stride=1, pool_type='avg')
self.fc = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias_attr=False),
nn.BatchNorm(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = paddle.reshape(x, (x.shape[0], -1))
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
import numpy as np
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid import layers as F
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype
import paddle
import paddle.nn as nn
class _SpectralNorm(nn.SpectralNorm):
def __init__(self,
weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype)
def forward(self, weight):
check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0
self._helper.append_op(
type="spectral_norm",
inputs=inputs,
outputs={"Out": out, },
attrs={
"dim": self._dim,
"power_iters": _power_iters, #self._power_iters,
"eps": self._eps,
})
return out
class Spectralnorm(nn.Layer):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype)
self.dim = dim
self.power_iters = power_iters
self.eps = eps
self.layer = layer
weight = layer._parameters['weight']
del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype)
self.weight_orig.set_value(weight)
def forward(self, x):
weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight
out = self.layer(x)
return out
MODEL:
name: "EDVR"
format: "png"
num_frames: 5
center: 2
num_filters: 128 #64
deform_conv_groups: 8
front_RBs: 5
back_RBs: 40 #10
predeblur: False
HR_in: False
w_TSA: True #False
INFER:
scale: 4
crop_size: 256
interval_list: [1]
random_reverse: False
number_frames: 5
batch_size: 1
file_root: "/workspace/color/input_frames"
inference_model: "/workspace/PaddleGAN/applications/EDVR/data/inference_model"
use_flip: False
use_rot: False
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. #you may not use this file except in compliance with the License.
...@@ -14,83 +14,41 @@ ...@@ -14,83 +14,41 @@
import os import os
import sys import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time import time
import logging
import argparse import argparse
import ast import ast
import glob
import numpy as np import numpy as np
try:
import cPickle as pickle
except:
import pickle
import paddle.fluid as fluid import paddle.fluid as fluid
import cv2 import cv2
from utils.config_utils import * from data import EDVRDataset
#import models from paddle.incubate.hapi.download import get_path_from_url
from reader import get_reader
#from metrics import get_metrics
from utils.utility import check_cuda
from utils.utility import check_version
logging.root.handlers = []
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'--model_name', '--input',
type=str,
default='AttentionCluster',
help='name of model to train.')
parser.add_argument(
'--inference_model',
type=str,
default='./data/inference_model',
help='path of inference_model.')
parser.add_argument(
'--config',
type=str,
default='configs/attention_cluster.txt',
help='path to config file of model')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='default use gpu.')
parser.add_argument(
'--batch_size',
type=int,
default=1,
help='sample number in a batch for inference.')
parser.add_argument(
'--filelist',
type=str, type=str,
default=None, default=None,
help='path to inferenece data file lists file.') help='input video path')
parser.add_argument(
'--log_interval',
type=int,
default=1,
help='mini-batch interval to log.')
parser.add_argument( parser.add_argument(
'--infer_topk', '--output',
type=int,
default=20,
help='topk predictions to restore.')
parser.add_argument(
'--save_dir',
type=str, type=str,
default=os.path.join('data', 'predict_results'), default='output',
help='directory to store results') help='output path')
parser.add_argument( parser.add_argument(
'--video_path', '--weight_path',
type=str, type=str,
default=None, default=None,
help='directory to store results') help='weight path')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -106,69 +64,143 @@ def get_img(pred): ...@@ -106,69 +64,143 @@ def get_img(pred):
return pred return pred
def save_img(img, framename): def save_img(img, framename):
dirname = './demo/resultpng' dirname = os.path.dirname(framename)
filename = os.path.join(dirname, framename+'.png') if not os.path.exists(dirname):
cv2.imwrite(filename, img) os.makedirs(dirname)
cv2.imwrite(framename, img)
def infer(args):
# parse config
config = parse_config(args.config) def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None):
infer_config = merge_configs(config, 'infer', vars(args)) ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
print_configs(infer_config, "Infer") vid_name = vid_path.split('/')[-1].split('.')[0]
inference_model = args.inference_model out_full_path = os.path.join(outpath, 'frames_input')
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams' if not os.path.exists(out_full_path):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() os.makedirs(out_full_path)
exe = fluid.Executor(place)
# video file name
[inference_program, feed_list, fetch_list] = fluid.io.load_inference_model(dirname=inference_model, model_filename=model_filename, params_filename=params_filename, executor=exe) outformat = out_full_path + '/%08d.png'
infer_reader = get_reader(args.model_name.upper(), 'infer', infer_config) if ss is not None and t is not None and r is not None:
#infer_metrics = get_metrics(args.model_name.upper(), 'infer', infer_config) cmd = ffmpeg + [
#infer_metrics.reset() ' -ss ',
ss,
periods = [] ' -t ',
cur_time = time.time() t,
for infer_iter, data in enumerate(infer_reader()): ' -i ',
if args.model_name == 'EDVR': vid_path,
data_feed_in = [items[0] for items in data] ' -r ',
video_info = [items[1:] for items in data] r,
infer_outs = exe.run(inference_program, ' -qscale:v ',
fetch_list=fetch_list, ' 0.1 ',
feed={feed_list[0]:np.array(data_feed_in)}) ' -start_number ',
infer_result_list = [item for item in infer_outs] ' 0 ',
videonames = [item[0] for item in video_info] outformat
framenames = [item[1] for item in video_info] ]
for i in range(len(infer_result_list)): else:
img_i = get_img(infer_result_list[i]) cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat]
save_img(img_i, 'img' + videonames[i] + framenames[i])
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
prev_time = cur_time print('Video: {} done'.format(vid_name))
else:
print('Video: {} error'.format(vid_name))
print('')
sys.stdout.flush()
return out_full_path
def frames_to_video_ffmpeg(framepath, videopath, r):
ffmpeg = ['ffmpeg ', ' -loglevel ', ' error ']
cmd = ffmpeg + [
' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ',
' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath
]
cmd = ''.join(cmd)
print(cmd)
if os.system(cmd) == 0:
print('Video: {} done'.format(videopath))
else:
print('Video: {} error'.format(videopath))
print('')
sys.stdout.flush()
class EDVRPredictor:
def __init__(self, input, output, weight_path=None):
self.input = input
self.output = os.path.join(output, 'EDVR')
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
self.exe = fluid.Executor(place)
if weight_path is None:
weight_path = get_path_from_url(EDVR_weight_url, cur_path)
print(weight_path)
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams'
out = fluid.io.load_inference_model(dirname=weight_path,
model_filename=model_filename,
params_filename=params_filename,
executor=self.exe)
self.infer_prog, self.feed_list, self.fetch_list = out
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = dump_frames_ffmpeg(vid, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
dataset = EDVRDataset(frames)
periods = []
cur_time = time.time() cur_time = time.time()
period = cur_time - prev_time for infer_iter, data in enumerate(dataset):
periods.append(period) data_feed_in = [data[0]]
#infer_metrics.accumulate(infer_result_list) infer_outs = self.exe.run(self.infer_prog,
fetch_list=self.fetch_list,
feed={self.feed_list[0]:np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
if args.log_interval > 0 and infer_iter % args.log_interval == 0: frame_path = data[1]
logger.info('Processed {} samples'.format(infer_iter + 1))
img_i = get_img(infer_result_list[0])
save_img(img_i, os.path.join(pred_frame_path, os.path.basename(frame_path)))
logger.info('[INFER] infer finished. average time: {}'.format(np.mean(periods))) prev_time = cur_time
cur_time = time.time()
period = cur_time - prev_time
periods.append(period)
if not os.path.isdir(args.save_dir): print('Processed {} samples'.format(infer_iter + 1))
os.makedirs(args.save_dir) frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name))
frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps)))
#infer_metrics.finalize_and_log_out(savedir=args.save_dir) return frame_pattern_combined, vid_out_path
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() predictor = EDVRPredictor(args.input, args.output, args.weight_path)
# check whether the installed paddle is compiled with GPU predictor.run()
check_cuda(args.use_gpu)
check_version()
logger.info(args)
infer(args)
from .reader_utils import regist_reader, get_reader
from .edvr_reader import EDVRReader
regist_reader("EDVR", EDVRReader)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import cv2
import math
import random
import multiprocessing
import functools
import numpy as np
import paddle
import cv2
import logging
from .reader_utils import DataReader
logger = logging.getLogger(__name__)
python_ver = sys.version_info
random.seed(0)
np.random.seed(0)
class EDVRReader(DataReader):
"""
Data reader for video super resolution task fit for EDVR model.
This is specified for REDS dataset.
"""
def __init__(self, name, mode, cfg):
super(EDVRReader, self).__init__(name, mode, cfg)
self.format = cfg.MODEL.format
self.crop_size = self.get_config_from_sec(mode, 'crop_size')
self.interval_list = self.get_config_from_sec(mode, 'interval_list')
self.random_reverse = self.get_config_from_sec(mode, 'random_reverse')
self.number_frames = self.get_config_from_sec(mode, 'number_frames')
# set batch size and file list
self.batch_size = cfg[mode.upper()]['batch_size']
self.fileroot = cfg[mode.upper()]['file_root']
self.use_flip = self.get_config_from_sec(mode, 'use_flip', False)
self.use_rot = self.get_config_from_sec(mode, 'use_rot', False)
self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads', 1)
self.buf_size = self.get_config_from_sec(mode, 'buf_size', 1024)
self.fix_random_seed = self.get_config_from_sec(mode, 'fix_random_seed', False)
if self.mode != 'infer':
self.gtroot = self.get_config_from_sec(mode, 'gt_root')
self.scale = self.get_config_from_sec(mode, 'scale', 1)
self.LR_input = (self.scale > 1)
if self.fix_random_seed:
random.seed(0)
np.random.seed(0)
self.num_reader_threads = 1
def create_reader(self):
logger.info('initialize reader ... ')
self.filelist = []
for video_name in os.listdir(self.fileroot):
if (self.mode == 'train') and (video_name in ['000', '011', '015', '020']):
continue
for frame_name in os.listdir(os.path.join(self.fileroot, video_name)):
frame_idx = frame_name.split('.')[0]
video_frame_idx = video_name + '_' + frame_idx
# for each item in self.filelist is like '010_00000015', '260_00000090'
self.filelist.append(video_frame_idx)
if self.mode == 'test' or self.mode == 'infer':
self.filelist.sort()
if self.num_reader_threads == 1:
reader_func = make_reader
else:
reader_func = make_multi_reader
if self.mode != 'infer':
return reader_func(filelist = self.filelist,
num_threads = self.num_reader_threads,
batch_size = self.batch_size,
is_training = (self.mode == 'train'),
number_frames = self.number_frames,
interval_list = self.interval_list,
random_reverse = self.random_reverse,
fileroot = self.fileroot,
crop_size = self.crop_size,
use_flip = self.use_flip,
use_rot = self.use_rot,
gtroot = self.gtroot,
LR_input = self.LR_input,
scale = self.scale,
mode = self.mode)
else:
return reader_func(filelist = self.filelist,
num_threads = self.num_reader_threads,
batch_size = self.batch_size,
is_training = (self.mode == 'train'),
number_frames = self.number_frames,
interval_list = self.interval_list,
random_reverse = self.random_reverse,
fileroot = self.fileroot,
crop_size = self.crop_size,
use_flip = self.use_flip,
use_rot = self.use_rot,
gtroot = '',
LR_input = True,
scale = 4,
mode = self.mode)
def get_sample_data(item, number_frames, interval_list, random_reverse, fileroot,
crop_size, use_flip, use_rot, gtroot, LR_input, scale, mode='train'):
video_name = item.split('_')[0]
frame_name = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
ngb_frames, name_b = get_neighbor_frames(frame_name, \
number_frames = number_frames, \
interval_list = interval_list, \
random_reverse = random_reverse)
elif (mode == 'test') or (mode == 'infer'):
ngb_frames, name_b = get_test_neighbor_frames(int(frame_name), number_frames)
else:
raise NotImplementedError('mode {} not implemented'.format(mode))
frame_name = name_b
print('key2', ngb_frames, name_b)
if mode != 'infer':
img_GT = read_img(os.path.join(gtroot, video_name, frame_name + '.png'), is_gt=True)
#print('gt_mean', np.mean(img_GT))
frame_list = []
for ngb_frm in ngb_frames:
ngb_name = "%04d"%ngb_frm
#img = read_img(os.path.join(fileroot, video_name, frame_name + '.png'))
img = read_img(os.path.join(fileroot, video_name, ngb_name + '.png'))
frame_list.append(img)
#print('img_mean', np.mean(img))
H, W, C = frame_list[0].shape
# add random crop
if (mode == 'train') or (mode == 'valid'):
if LR_input:
LQ_size = crop_size // scale
rnd_h = random.randint(0, max(0, H - LQ_size))
rnd_w = random.randint(0, max(0, W - LQ_size))
#print('rnd_h {}, rnd_w {}', rnd_h, rnd_w)
frame_list = [v[rnd_h:rnd_h + LQ_size, rnd_w:rnd_w + LQ_size, :] for v in frame_list]
rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale)
img_GT = img_GT[rnd_h_HR:rnd_h_HR + crop_size, rnd_w_HR:rnd_w_HR + crop_size, :]
else:
rnd_h = random.randint(0, max(0, H - crop_size))
rnd_w = random.randint(0, max(0, W - crop_size))
frame_list = [v[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :] for v in frame_list]
img_GT = img_GT[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :]
# add random flip and rotation
if mode != 'infer':
frame_list.append(img_GT)
if (mode == 'train') or (mode == 'valid'):
rlt = img_augment(frame_list, use_flip, use_rot)
else:
rlt = frame_list
if mode != 'infer':
frame_list = rlt[0:-1]
img_GT = rlt[-1]
else:
frame_list = rlt
# stack LQ images to NHWC, N is the frame number
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
if mode != 'infer':
img_GT = img_GT[:, :, [2, 1, 0]]
img_GT = np.transpose(img_GT, (2, 0, 1)).astype('float32')
return img_LQs, img_GT
else:
return img_LQs
def get_test_neighbor_frames(crt_i, N, max_n=100, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
name_b = '{:08d}'.format(crt_i)
return return_l, name_b
def get_neighbor_frames(frame_name, number_frames, interval_list, random_reverse, max_frame=99, bordermode=False):
center_frame_idx = int(frame_name)
half_N_frames = number_frames // 2
#### determine the neighbor frames
interval = random.choice(interval_list)
if bordermode:
direction = 1 # 1: forward; 0: backward
if random_reverse and random.random() < 0.5:
direction = random.choice([0, 1])
if center_frame_idx + interval * (number_frames - 1) > max_frame:
direction = 0
elif center_frame_idx - interval * (number_frames - 1) < 0:
direction = 1
# get the neighbor list
if direction == 1:
neighbor_list = list(
range(center_frame_idx, center_frame_idx + interval * number_frames, interval))
else:
neighbor_list = list(
range(center_frame_idx, center_frame_idx - interval * number_frames, -interval))
name_b = '{:08d}'.format(neighbor_list[0])
else:
# ensure not exceeding the borders
while (center_frame_idx + half_N_frames * interval >
max_frame) or (center_frame_idx - half_N_frames * interval < 0):
center_frame_idx = random.randint(0, max_frame)
# get the neighbor list
neighbor_list = list(
range(center_frame_idx - half_N_frames * interval,
center_frame_idx + half_N_frames * interval + 1, interval))
if random_reverse and random.random() < 0.5:
neighbor_list.reverse()
name_b = '{:08d}'.format(neighbor_list[half_N_frames])
assert len(neighbor_list) == number_frames, \
"frames slected have length({}), but it should be ({})".format(len(neighbor_list), number_frames)
return neighbor_list, name_b
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
#if not is_gt:
# #print(path)
# img = cv2.resize(img, (0, 0), fx=0.25, fy=0.25)
#print("path: ", path)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
# some images have 4 channels
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def img_augment(img_list, hflip=True, rot=True):
"""horizontal flip OR rotate (0, 90, 180, 270 degrees)"""
hflip = hflip and random.random() < 0.5
vflip = rot and random.random() < 0.5
rot90 = rot and random.random() < 0.5
def _augment(img):
if hflip:
img = img[:, ::-1, :]
if vflip:
img = img[::-1, :, :]
if rot90:
img = img.transpose(1, 0, 2)
return img
return [_augment(img) for img in img_list]
def make_reader(filelist,
num_threads,
batch_size,
is_training,
number_frames,
interval_list,
random_reverse,
fileroot,
crop_size,
use_flip,
use_rot,
gtroot,
LR_input,
scale,
mode='train'):
fl = filelist
def reader_():
if is_training:
random.shuffle(fl)
batch_out = []
for item in fl:
if mode != 'infer':
img_LQs, img_GT = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
else:
img_LQs = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
videoname = item.split('_')[0]
framename = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
batch_out.append((img_LQs, img_GT))
elif mode == 'test':
batch_out.append((img_LQs, img_GT, videoname, framename))
elif mode == 'infer':
batch_out.append((img_LQs, videoname, framename))
else:
raise NotImplementedError("mode {} not implemented".format(mode))
if len(batch_out) == batch_size:
yield batch_out
batch_out = []
return reader_
def make_multi_reader(filelist,
num_threads,
batch_size,
is_training,
number_frames,
interval_list,
random_reverse,
fileroot,
crop_size,
use_flip,
use_rot,
gtroot,
LR_input,
scale,
mode='train'):
def read_into_queue(flq, queue):
batch_out = []
for item in flq:
if mode != 'infer':
img_LQs, img_GT = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
else:
img_LQs = get_sample_data(item,
number_frames, interval_list, random_reverse, fileroot,
crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode)
videoname = item.split('_')[0]
framename = item.split('_')[1]
if (mode == 'train') or (mode == 'valid'):
batch_out.append((img_LQs, img_GT))
elif mode == 'test':
batch_out.append((img_LQs, img_GT, videoname, framename))
elif mode == 'infer':
batch_out.append((img_LQs, videoname, framename))
else:
raise NotImplementedError("mode {} not implemented".format(mode))
if len(batch_out) == batch_size:
queue.put(batch_out)
batch_out = []
queue.put(None)
def queue_reader():
fl = filelist
if is_training:
random.shuffle(fl)
n = num_threads
queue_size = 20
reader_lists = [None] * n
file_num = int(len(fl) // n)
for i in range(n):
if i < len(reader_lists) - 1:
tmp_list = fl[i * file_num:(i + 1) * file_num]
else:
tmp_list = fl[i * file_num:]
reader_lists[i] = tmp_list
queue = multiprocessing.Queue(queue_size)
p_list = [None] * len(reader_lists)
# for reader_list in reader_lists:
for i in range(len(reader_lists)):
reader_list = reader_lists[i]
p_list[i] = multiprocessing.Process(
target=read_into_queue, args=(reader_list, queue))
p_list[i].start()
reader_num = len(reader_lists)
finish_num = 0
while finish_num < reader_num:
sample = queue.get()
if sample is None:
finish_num += 1
else:
yield sample
for i in range(len(p_list)):
if p_list[i].is_alive():
p_list[i].join()
return queue_reader
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import pickle
import cv2
import numpy as np
import random
class ReaderNotFoundError(Exception):
"Error: reader not found"
def __init__(self, reader_name, avail_readers):
super(ReaderNotFoundError, self).__init__()
self.reader_name = reader_name
self.avail_readers = avail_readers
def __str__(self):
msg = "Reader {} Not Found.\nAvailiable readers:\n".format(
self.reader_name)
for reader in self.avail_readers:
msg += " {}\n".format(reader)
return msg
class DataReader(object):
"""data reader for video input"""
def __init__(self, model_name, mode, cfg):
self.name = model_name
self.mode = mode
self.cfg = cfg
def create_reader(self):
"""Not implemented"""
pass
def get_config_from_sec(self, sec, item, default=None):
if sec.upper() not in self.cfg:
return default
return self.cfg[sec.upper()].get(item, default)
class ReaderZoo(object):
def __init__(self):
self.reader_zoo = {}
def regist(self, name, reader):
assert reader.__base__ == DataReader, "Unknow model type {}".format(
type(reader))
self.reader_zoo[name] = reader
def get(self, name, mode, cfg):
for k, v in self.reader_zoo.items():
if k == name:
return v(name, mode, cfg)
raise ReaderNotFoundError(name, self.reader_zoo.keys())
# singleton reader_zoo
reader_zoo = ReaderZoo()
def regist_reader(name, reader):
reader_zoo.regist(name, reader)
def get_reader(name, mode, cfg):
reader_model = reader_zoo.get(name, mode, cfg)
return reader_model.create_reader()
...@@ -17,10 +17,10 @@ valid_interval=1 ...@@ -17,10 +17,10 @@ valid_interval=1
weights="./weights/paddle_state_dict_L.npz" weights="./weights/paddle_state_dict_L.npz"
export CUDA_VISIBLE_DEVICES=4,5,6,7 #0,1,5,6 fast, 2,3,4,7 slow export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow
export FLAGS_fast_eager_deletion_mode=1 # export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0 # export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98 # export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "predict"x ]; then if [ "$mode"x == "predict"x ]; then
echo $mode $name $configs $weights echo $mode $name $configs $weights
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import yaml
from .utility import AttrDict
import logging
logger = logging.getLogger(__name__)
CONFIG_SECS = [
'train',
'valid',
'test',
'infer',
]
def parse_config(cfg_file):
"""Load a config file into AttrDict"""
import yaml
with open(cfg_file, 'r') as fopen:
yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader))
create_attr_dict(yaml_config)
return yaml_config
def create_attr_dict(yaml_config):
from ast import literal_eval
for key, value in yaml_config.items():
if type(value) is dict:
yaml_config[key] = value = AttrDict(value)
if isinstance(value, str):
try:
value = literal_eval(value)
except BaseException:
pass
if isinstance(value, AttrDict):
create_attr_dict(yaml_config[key])
else:
yaml_config[key] = value
return
def merge_configs(cfg, sec, args_dict):
assert sec in CONFIG_SECS, "invalid config section {}".format(sec)
sec_dict = getattr(cfg, sec.upper())
for k, v in args_dict.items():
if v is None:
continue
try:
if hasattr(sec_dict, k):
setattr(sec_dict, k, v)
except:
pass
return cfg
def print_configs(cfg, mode):
logger.info("---------------- {:>5} Arguments ----------------".format(
mode))
for sec, sec_items in cfg.items():
logger.info("{}:".format(sec))
for k, v in sec_items.items():
logger.info(" {}:{}".format(k, v))
logger.info("-------------------------------------------------")
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import signal
import logging
import paddle
import paddle.fluid as fluid
__all__ = ['AttrDict']
logger = logging.getLogger(__name__)
def _term(sig_num, addition):
print('current pid is %s, group id is %s' % (os.getpid(), os.getpgrp()))
os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
signal.signal(signal.SIGTERM, _term)
signal.signal(signal.SIGINT, _term)
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
if key in self.__dict__:
self.__dict__[key] = value
else:
self[key] = value
def check_cuda(use_cuda, err = \
"\nYou can not set use_gpu = True in the model because you are using paddlepaddle-cpu.\n \
Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu = False to run models on CPU.\n"
):
try:
if use_cuda == True and fluid.is_compiled_with_cuda() == False:
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
cd DAIN/pwcnet/correlation_op
# 第一次需要执行
# bash make.shap
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
export PYTHONPATH=$PYTHONPATH:`pwd`
cd -
# input 输入视频的路径
# output 输出视频保存的路径
# proccess_order 使用模型的顺序
python tools/main.py \
--input input.mp4 --output output --proccess_order DAIN DeOldify EDVR
import sys
sys.path.append('.')
import argparse
import paddle
from DAIN.predict import VideoFrameInterp
from DeOldify.predict import DeOldifyPredictor
from EDVR.predict import EDVRPredictor
parser = argparse.ArgumentParser(description='Fix video')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--DAIN_weight', type=str, default=None, help='Path to the reference image directory')
parser.add_argument('--DeOldify_weight', type=str, default=None, help='Path to the reference image directory')
parser.add_argument('--EDVR_weight', type=str, default=None, help='Path to the reference image directory')
# DAIN args
parser.add_argument('--time_step', type=float, default=0.5, help='choose the time steps')
parser.add_argument('--proccess_order', type=str, default='none', nargs='+', help='Process order')
if __name__ == "__main__":
args = parser.parse_args()
print('args...', args)
orders = args.proccess_order
temp_video_path = None
for order in orders:
if order == 'DAIN':
predictor = VideoFrameInterp(args.time_step, args.DAIN_weight,
args.input, output_path=args.output)
frames_path, temp_video_path = predictor.run()
elif order == 'DeOldify':
print('frames:', frames_path)
print('video_path:', temp_video_path)
paddle.disable_static()
predictor = DeOldifyPredictor(temp_video_path, args.output, weight_path=args.DeOldify_weight)
frames_path, temp_video_path = predictor.run()
print('frames:', frames_path)
print('video_path:', temp_video_path)
paddle.enable_static()
elif order == 'EDVR':
predictor = EDVRPredictor(temp_video_path, args.output, weight_path=args.EDVR_weight)
frames_path, temp_video_path = predictor.run()
print('frames:', frames_path)
print('video_path:', temp_video_path)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册