diff --git a/applications/DAIN/my_args.py b/applications/DAIN/my_args.py index ee4e5f43c91934130dcfe916fd05b90d8215967f..e8ad1abc48e2ef743b7142a8a2d6758b5ae20579 100644 --- a/applications/DAIN/my_args.py +++ b/applications/DAIN/my_args.py @@ -91,4 +91,4 @@ parser.add_argument('--use_cuda', help='use cuda or not') parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not') -args = parser.parse_args() +# args = parser.parse_args() diff --git a/applications/DAIN/predict.py b/applications/DAIN/predict.py index 392390671184d47c42b329512b2065073094bb17..b92e907192384556cbb1c6b45700c938ce884e61 100644 --- a/applications/DAIN/predict.py +++ b/applications/DAIN/predict.py @@ -1,19 +1,23 @@ -import os, sys -import math -import random +import os +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.append(cur_path) + import time import glob -import shutil import numpy as np from imageio import imread, imsave import cv2 import paddle.fluid as fluid +from paddle.incubate.hapi.download import get_path_from_url import networks from util import * -from my_args import args +from my_args import parser +DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar' def infer_engine(model_dir, run_mode='fluid', @@ -76,18 +80,17 @@ class VideoFrameInterp(object): key_frame_thread=0., output_path='output'): self.video_path = video_path - self.output_path = output_path + self.output_path = os.path.join(output_path, 'DAIN') + if model_path is None: + model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path) + self.model_path = model_path self.time_step = time_step self.key_frame_thread = key_frame_thread self.exe, self.program, self.fetch_targets = executor(model_path, use_gpu=use_gpu) - # self.predictor = load_predictor( - # model_dir, - # run_mode=run_mode, - # min_subgraph_size=3, - # use_gpu=use_gpu) + def run(self): frame_path_input = os.path.join(self.output_path, 'frames-input') @@ -269,9 +272,12 @@ class VideoFrameInterp(object): os.remove(video_pattern_output) frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2) + + return frame_pattern_combined, video_pattern_output if __name__ == '__main__': + args = parser.parse_args() predictor = VideoFrameInterp(args.time_step, args.saved_model, args.video_path, args.output_path) predictor.run() diff --git a/applications/DAIN/run.sh b/applications/DAIN/run.sh index aaf96ecacec6e01e9898d2ab6b35b72ab95093ac..464db1ad6722587f0618aaa060feb7e36f843e11 100644 --- a/applications/DAIN/run.sh +++ b/applications/DAIN/run.sh @@ -3,7 +3,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle. export PYTHONPATH=$PYTHONPATH:`pwd` cd ../../ -VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4 +# VID_PATH=/workspace/codes/colorization_paddle_net_weights/video/Peking_input360p.mp4 +VID_PATH=/workspace/codes/colorization_paddle_net_weights/video/Peking_input360p_clip_5_15.mp4 OUT_PATH=output MODEL_PATH=DAIN_paddle_weight @@ -13,8 +14,8 @@ MODEL_PATH=DAIN_paddle_weight # --output_path=$OUT_PATH \ # --saved_model=$MODEL_PATH -CUDA_VISIBLE_DEVICES=2 python predict.py \ - --time_step 0.125 \ +CUDA_VISIBLE_DEVICES=5 python predict.py \ + --time_step 0.5 \ --video_path=$VID_PATH \ --output_path=$OUT_PATH \ --saved_model=$MODEL_PATH diff --git a/applications/DAIN/util.py b/applications/DAIN/util.py index de6e18c6513ae7dc0b4393192ce406a357210e1b..3b83a1bb7aeff9a6a5dc04d32b57f9ccfa491ea5 100644 --- a/applications/DAIN/util.py +++ b/applications/DAIN/util.py @@ -95,8 +95,12 @@ def combine_frames(input, interpolated, combined, num_frames): dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1))) shutil.copy2(src, dst) if i < num1 - 1: - for k in range(num_frames): - src = frames2[i * num_frames + k] - dst = os.path.join( - combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1)) - shutil.copy2(src, dst) + try: + for k in range(num_frames): + src = frames2[i * num_frames + k] + dst = os.path.join( + combined, '{:08d}.png'.format(i * (num_frames + 1) + k + 1)) + shutil.copy2(src, dst) + except Exception as e: + print(e) + print(len(frames2), num_frames, i, k, i * num_frames + k) diff --git a/applications/DeOldify/hook.py b/applications/DeOldify/hook.py new file mode 100644 index 0000000000000000000000000000000000000000..ebc75a6f59b187a379cb080344eac77608adb09a --- /dev/null +++ b/applications/DeOldify/hook.py @@ -0,0 +1,81 @@ +import numpy as np + +import paddle +import paddle.nn as nn + +def is_listy(x): + return isinstance(x, (tuple,list)) + + +class Hook(): + "Create a hook on `m` with `hook_func`." + def __init__(self, m, hook_func, is_forward=True, detach=True): + self.hook_func,self.detach,self.stored = hook_func,detach,None + f = m.register_forward_post_hook if is_forward else m.register_backward_hook + self.hook = f(self.hook_fn) + self.removed = False + + def hook_fn(self, module, input, output): + "Applies `hook_func` to `module`, `input`, `output`." + if self.detach: + input = (o.detach() for o in input ) if is_listy(input ) else input.detach() + output = (o.detach() for o in output) if is_listy(output) else output.detach() + self.stored = self.hook_func(module, input, output) + + def remove(self): + "Remove the hook from the model." + if not self.removed: + self.hook.remove() + self.removed=True + + def __enter__(self, *args): return self + def __exit__(self, *args): self.remove() + +class Hooks(): + "Create several hooks on the modules in `ms` with `hook_func`." + def __init__(self, ms, hook_func, is_forward=True, detach=True): + self.hooks = [] + try: + for m in ms: + self.hooks.append(Hook(m, hook_func, is_forward, detach)) + except Exception as e: + print(e) + + def __getitem__(self,i:int)->Hook: return self.hooks[i] + def __len__(self)->int: return len(self.hooks) + def __iter__(self): return iter(self.hooks) + @property + def stored(self): return [o.stored for o in self] + + def remove(self): + "Remove the hooks from the model." + for h in self.hooks: h.remove() + + def __enter__(self, *args): return self + def __exit__ (self, *args): self.remove() + +def _hook_inner(m,i,o): return o if isinstance(o, paddle.framework.Variable) else o if is_listy(o) else list(o) + +def hook_output (module, detach=True, grad=False): + "Return a `Hook` that stores activations of `module` in `self.stored`" + return Hook(module, _hook_inner, detach=detach, is_forward=not grad) + +def hook_outputs(modules, detach=True, grad=False): + "Return `Hooks` that store activations of all `modules` in `self.stored`" + return Hooks(modules, _hook_inner, detach=detach, is_forward=not grad) + +def model_sizes(m, size=(64,64)): + "Pass a dummy input through the model `m` to get the various sizes of activations." + with hook_outputs(m) as hooks: + x = dummy_eval(m, size) + return [o.stored.shape for o in hooks] + +def dummy_eval(m, size=(64,64)): + "Pass a `dummy_batch` in evaluation mode in `m` with `size`." + m.eval() + return m(dummy_batch(size)) + +def dummy_batch(size=(64,64), ch_in=3): + "Create a dummy batch to go through `m` with `size`." + arr = np.random.rand(1, ch_in, *size).astype('float32') * 2 - 1 + return paddle.to_tensor(arr) diff --git a/applications/DeOldify/model.py b/applications/DeOldify/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f763eebef8ecd4edadf9d9b47ca3a6e0332ec5f1 --- /dev/null +++ b/applications/DeOldify/model.py @@ -0,0 +1,395 @@ +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from resnet_backbone import resnet34, resnet101 +from hook import hook_outputs, model_sizes, dummy_eval +# from weight_norm import weight_norm +from spectral_norm import Spectralnorm +from conv import Conv1D +from paddle import fluid + + +class SequentialEx(nn.Layer): + "Like `nn.Sequential`, but with ModuleList semantics, and can access module input" + def __init__(self, *layers): + super().__init__() + self.layers = nn.LayerList(layers) + + def forward(self, x): + res = x + for l in self.layers: + if isinstance(l, MergeLayer): + l.orig = x + nres = l(res) + # We have to remove res.orig to avoid hanging refs and therefore memory leaks + # l.orig = None + res = nres + return res + + def __getitem__(self,i): return self.layers[i] + def append(self,l): return self.layers.append(l) + def extend(self,l): return self.layers.extend(l) + def insert(self,i,l): return self.layers.insert(i,l) + + +class Deoldify(SequentialEx): + def __init__(self, encoder, n_classes, blur=False, blur_final=True, self_attention=False, y_range=None, last_cross=True, bottle=False, norm_type='Batch', nf_factor=1, **kwargs): + + imsize = (256, 256) + sfs_szs = model_sizes(encoder, size=imsize) + sfs_idxs = list(reversed(_get_sfs_idxs(sfs_szs))) + self.sfs = hook_outputs([encoder[i] for i in sfs_idxs], detach=False) + x = dummy_eval(encoder, imsize).detach() + + nf = 512 * nf_factor + extra_bn = norm_type == 'Spectral' + ni = sfs_szs[-1][1] + middle_conv = nn.Sequential( + custom_conv_layer( + ni, ni * 2, norm_type=norm_type, extra_bn=extra_bn + ), + custom_conv_layer( + ni * 2, ni, norm_type=norm_type, extra_bn=extra_bn + ), + ) + + layers = [encoder, nn.BatchNorm(ni), nn.ReLU(), middle_conv] + + for i, idx in enumerate(sfs_idxs): + not_final = i != len(sfs_idxs) - 1 + up_in_c, x_in_c = int(x.shape[1]), int(sfs_szs[idx][1]) + do_blur = blur and (not_final or blur_final) + sa = self_attention and (i == len(sfs_idxs) - 3) + + n_out = nf if not_final else nf // 2 + + unet_block = UnetBlockWide( + up_in_c, + x_in_c, + n_out, + self.sfs[i], + final_div=not_final, + blur=blur, + self_attention=sa, + norm_type=norm_type, + extra_bn=extra_bn, + **kwargs + ) + unet_block.eval() + layers.append(unet_block) + x = unet_block(x) + + ni = x.shape[1] + if imsize != sfs_szs[0][-2:]: + layers.append(PixelShuffle_ICNR(ni, **kwargs)) + if last_cross: + layers.append(MergeLayer(dense=True)) + ni += 3 + layers.append(res_block(ni, bottle=bottle, norm_type=norm_type, **kwargs)) + layers += [ + custom_conv_layer(ni, n_classes, ks=1, use_activ=False, norm_type=norm_type) + ] + if y_range is not None: + layers.append(SigmoidRange(*y_range)) + super().__init__(*layers) + + + +def custom_conv_layer( + ni: int, + nf: int, + ks: int = 3, + stride: int = 1, + padding: int = None, + bias: bool = None, + is_1d: bool = False, + norm_type='Batch', + use_activ: bool = True, + leaky: float = None, + transpose: bool = False, + self_attention: bool = False, + extra_bn: bool = False, + **kwargs +): + "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers." + if padding is None: + padding = (ks - 1) // 2 if not transpose else 0 + bn = norm_type in ('Batch', 'Batchzero') or extra_bn == True + if bias is None: + bias = not bn + conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d + + conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding) + if norm_type == 'Weight': + print('use weight norm') + conv = nn.utils.weight_norm(conv) + elif norm_type == 'Spectral': + # pass + conv = Spectralnorm(conv) + layers = [conv] + if use_activ: + layers.append(relu(True, leaky=leaky)) + if bn: + layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf)) + if self_attention: + layers.append(SelfAttention(nf)) + + return nn.Sequential(*layers) + + +def relu(inplace:bool=False, leaky:float=None): + "Return a relu activation, maybe `leaky` and `inplace`." + return nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU() + + +class UnetBlockWide(nn.Layer): + "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`." + + def __init__( + self, + up_in_c: int, + x_in_c: int, + n_out: int, + hook, + final_div: bool = True, + blur: bool = False, + leaky: float = None, + self_attention: bool = False, + **kwargs + ): + super().__init__() + self.hook = hook + up_out = x_out = n_out // 2 + self.shuf = CustomPixelShuffle_ICNR( + up_in_c, up_out, blur=blur, leaky=leaky, **kwargs + ) + self.bn = nn.BatchNorm(x_in_c) + ni = up_out + x_in_c + self.conv = custom_conv_layer( + ni, x_out, leaky=leaky, self_attention=self_attention, **kwargs + ) + self.relu = relu(leaky=leaky) + + def forward(self, up_in): + s = self.hook.stored + up_out = self.shuf(up_in) + ssh = s.shape[-2:] + if ssh != up_out.shape[-2:]: + up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest') + cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1)) + return self.conv(cat_x) + + +class UnetBlockDeep(paddle.fluid.Layer): + "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`." + + def __init__( + self, + up_in_c: int, + x_in_c: int, + # hook: Hook, + final_div: bool = True, + blur: bool = False, + leaky: float = None, + self_attention: bool = False, + nf_factor: float = 1.0, + **kwargs + ): + super().__init__() + + self.shuf = CustomPixelShuffle_ICNR( + up_in_c, up_in_c // 2, blur=blur, leaky=leaky, **kwargs + ) + self.bn = nn.BatchNorm(x_in_c) + ni = up_in_c // 2 + x_in_c + nf = int((ni if final_div else ni // 2) * nf_factor) + self.conv1 = custom_conv_layer(ni, nf, leaky=leaky, **kwargs) + self.conv2 = custom_conv_layer( + nf, nf, leaky=leaky, self_attention=self_attention, **kwargs + ) + self.relu = relu(leaky=leaky) + + def forward(self, up_in): + s = self.hook.stored + up_out = self.shuf(up_in) + ssh = s.shape[-2:] + if ssh != up_out.shape[-2:]: + up_out = F.interpolate(up_out, s.shape[-2:], mode='nearest') + cat_x = self.relu(paddle.concat([up_out, self.bn(s)], axis=1)) + return self.conv2(self.conv1(cat_x)) + + +def ifnone(a, b): + "`a` if `a` is not None, otherwise `b`." + return b if a is None else a + + +class PixelShuffle_ICNR(nn.Layer): + "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`." + def __init__(self, ni:int, nf:int=None, scale:int=2, blur:bool=False, norm_type='Weight', leaky:float=None): + super().__init__() + nf = ifnone(nf, ni) + self.conv = conv_layer(ni, nf*(scale**2), ks=1, norm_type=norm_type, use_activ=False) + + self.shuf = PixelShuffle(scale) + + self.pad = ReplicationPad2d((1,0,1,0)) + self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg') + self.relu = relu(True, leaky=leaky) + + def forward(self,x): + x = self.shuf(self.relu(self.conv(x))) + return self.blur(self.pad(x)) if self.blur else x + +def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, padding:int=None, bias:bool=None, is_1d:bool=False, + norm_type='Batch', use_activ:bool=True, leaky:float=None, + transpose:bool=False, init=None, self_attention:bool=False): + "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers." + if padding is None: padding = (ks-1)//2 if not transpose else 0 + bn = norm_type in ('Batch', 'BatchZero') + if bias is None: bias = not bn + conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d + + conv = conv_func(ni, nf, kernel_size=ks, bias_attr=bias, stride=stride, padding=padding) + if norm_type=='Weight': + conv = nn.utils.weight_norm(conv) + elif norm_type=='Spectral': + conv = Spectralnorm(conv) + + layers = [conv] + if use_activ: layers.append(relu(True, leaky=leaky)) + if bn: layers.append((nn.BatchNorm if is_1d else nn.BatchNorm)(nf)) + if self_attention: layers.append(SelfAttention(nf)) + return nn.Sequential(*layers) + + +class CustomPixelShuffle_ICNR(paddle.fluid.Layer): + "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`." + + def __init__( + self, + ni: int, + nf: int = None, + scale: int = 2, + blur: bool = False, + leaky: float = None, + **kwargs + ): + super().__init__() + nf = ifnone(nf, ni) + self.conv = custom_conv_layer( + ni, nf * (scale ** 2), ks=1, use_activ=False, **kwargs + ) + + self.shuf = PixelShuffle(scale) + + self.pad = ReplicationPad2d((1, 0, 1, 0)) + self.blur = nn.Pool2D(2, pool_stride=1, pool_type='avg') + self.relu = nn.LeakyReLU(leaky) if leaky is not None else nn.ReLU()#relu(True, leaky=leaky) + + def forward(self, x): + x = self.shuf(self.relu(self.conv(x))) + return self.blur(self.pad(x)) if self.blur else x + + +class MergeLayer(paddle.fluid.Layer): + "Merge a shortcut with the result of the module by adding them or concatenating thme if `dense=True`." + def __init__(self, dense:bool=False): + super().__init__() + self.dense=dense + self.orig = None + + def forward(self, x): + out = paddle.concat([x,self.orig], axis=1) if self.dense else (x+self.orig) + self.orig = None + return out + + +def res_block(nf, dense:bool=False, norm_type='Batch', bottle:bool=False, **conv_kwargs): + "Resnet block of `nf` features. `conv_kwargs` are passed to `conv_layer`." + norm2 = norm_type + if not dense and (norm_type=='Batch'): norm2 = 'BatchZero' + nf_inner = nf//2 if bottle else nf + return SequentialEx(conv_layer(nf, nf_inner, norm_type=norm_type, **conv_kwargs), + conv_layer(nf_inner, nf, norm_type=norm2, **conv_kwargs), + MergeLayer(dense)) + + +class SigmoidRange(paddle.fluid.Layer): + "Sigmoid module with range `(low,x_max)`" + def __init__(self, low, high): + super().__init__() + self.low,self.high = low,high + + def forward(self, x): return sigmoid_range(x, self.low, self.high) + + +def sigmoid_range(x, low, high): + "Sigmoid function with range `(low, high)`" + return F.sigmoid(x) * (high - low) + low + + + +class PixelShuffle(paddle.fluid.Layer): + def __init__(self, upscale_factor): + super(PixelShuffle, self).__init__() + self.upscale_factor = upscale_factor + + def forward(self, x): + return paddle.fluid.layers.pixel_shuffle(x, self.upscale_factor) + + +class ReplicationPad2d(nn.Layer): + def __init__(self, size): + super(ReplicationPad2d, self).__init__() + self.size = size + + def forward(self, x): + return paddle.fluid.layers.pad2d(x, self.size, mode="edge") + +def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False): + "Create and initialize a `nn.Conv1d` layer with spectral normalization." + conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias_attr=bias) + return Spectralnorm(conv) + + +class SelfAttention(nn.Layer): + "Self attention layer for nd." + def __init__(self, n_channels): + super().__init__() + self.query = conv1d(n_channels, n_channels//8) + self.key = conv1d(n_channels, n_channels//8) + self.value = conv1d(n_channels, n_channels) + self.gamma = self.create_parameter(shape=[1], + default_initializer=paddle.fluid.initializer.Constant(0.0))#nn.Parameter(tensor([0.])) + + def forward(self, x): + #Notation from https://arxiv.org/pdf/1805.08318.pdf + size = x.shape + x = paddle.reshape(x, list(size[:2]) + [-1]) + f,g,h = self.query(x),self.key(x),self.value(x) + + beta = paddle.nn.functional.softmax(paddle.bmm(paddle.transpose(f, [0, 2, 1]), g), axis=1) + o = self.gamma * paddle.bmm(h, beta) + x + return paddle.reshape(o, size) + +def _get_sfs_idxs(sizes): + "Get the indexes of the layers where the size of the activation changes." + feature_szs = [size[-1] for size in sizes] + sfs_idxs = list( + np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0] + ) + if feature_szs[0] != feature_szs[1]: + sfs_idxs = [0] + sfs_idxs + return sfs_idxs + + +def build_model(): + backbone = resnet101() + cut = -2 + encoder = nn.Sequential(*list(backbone.children())[:cut]) + + model = Deoldify(encoder, 3, blur=True, y_range=(-3, 3), norm_type='Spectral', self_attention=True, nf_factor=2) + return model diff --git a/applications/DeOldify/predict.py b/applications/DeOldify/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..9ac857f7b506e0ca3464be1ea2942f42005d582f --- /dev/null +++ b/applications/DeOldify/predict.py @@ -0,0 +1,187 @@ +import os +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.append(cur_path) + +import cv2 +import glob +import argparse +import numpy as np +import paddle +import pickle + +from PIL import Image +from tqdm import tqdm +from paddle import fluid +from model import build_model +from paddle.incubate.hapi.download import get_path_from_url + +parser = argparse.ArgumentParser(description='DeOldify') +parser.add_argument('--input', type=str, default='none', help='Input video') +parser.add_argument('--output', type=str, default='output', help='output dir') +parser.add_argument('--weight_path', type=str, default='none', help='Path to the reference image directory') + +DeOldify_weight_url = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' + +def frames_to_video_ffmpeg(framepath, videopath, r): + ffmpeg = ['ffmpeg ', ' -loglevel ', ' error '] + cmd = ffmpeg + [ + ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', + ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath + ] + cmd = ''.join(cmd) + print(cmd) + + if os.system(cmd) == 0: + print('Video: {} done'.format(videopath)) + else: + print('Video: {} error'.format(videopath)) + print('') + sys.stdout.flush() + + +class DeOldifyPredictor(): + def __init__(self, input, output, batch_size=1, weight_path=None): + self.input = input + self.output = os.path.join(output, 'DeOldify') + self.model = build_model() + if weight_path is None: + weight_path = get_path_from_url(DeOldify_weight_url, cur_path) + + state_dict, _ = paddle.load(weight_path) + self.model.load_dict(state_dict) + self.model.eval() + + def norm(self, img, render_factor=32, render_base=16): + target_size = render_factor * render_base + img = img.resize((target_size, target_size), resample=Image.BILINEAR) + + img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0 + + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + img -= img_mean + img /= img_std + return img.astype('float32') + + def denorm(self, img): + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + img *= img_std + img += img_mean + img = img.transpose((1, 2, 0)) + + return (img * 255).astype('uint8') + + def post_process(self, raw_color, orig): + color_np = np.asarray(raw_color) + orig_np = np.asarray(orig) + color_yuv = cv2.cvtColor(color_np, cv2.COLOR_BGR2YUV) + orig_yuv = cv2.cvtColor(orig_np, cv2.COLOR_BGR2YUV) + hires = np.copy(orig_yuv) + hires[:, :, 1:3] = color_yuv[:, :, 1:3] + final = cv2.cvtColor(hires, cv2.COLOR_YUV2BGR) + final = Image.fromarray(final) + return final + + def run_single(self, img_path): + ori_img = Image.open(img_path).convert('LA').convert('RGB') + img = self.norm(ori_img) + x = paddle.to_tensor(img[np.newaxis,...]) + out = self.model(x) + + pred_img = self.denorm(out.numpy()[0]) + pred_img = Image.fromarray(pred_img) + pred_img = pred_img.resize(ori_img.size, resample=Image.BILINEAR) + pred_img = self.post_process(pred_img, ori_img) + return pred_img + + def run(self): + vid = self.input + base_name = os.path.basename(vid).split('.')[0] + output_path = os.path.join(self.output, base_name) + pred_frame_path = os.path.join(output_path, 'frames_pred') + + if not os.path.exists(output_path): + os.makedirs(output_path) + + if not os.path.exists(pred_frame_path): + os.makedirs(pred_frame_path) + + cap = cv2.VideoCapture(vid) + fps = cap.get(cv2.CAP_PROP_FPS) + + out_path = dump_frames_ffmpeg(vid, output_path) + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + + + for frame in tqdm(frames): + pred_img = self.run_single(frame) + + frame_name = os.path.basename(frame) + pred_img.save(os.path.join(pred_frame_path, frame_name)) + + frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') + + vid_out_path = os.path.join(output_path, '{}_deoldify_out.mp4'.format(base_name)) + frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps))) + + return frame_pattern_combined, vid_out_path + + + +def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): + ffmpeg = ['ffmpeg ', ' -loglevel ', ' error '] + vid_name = vid_path.split('/')[-1].split('.')[0] + out_full_path = os.path.join(outpath, 'frames_input') + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = out_full_path + '/%08d.png' + + if ss is not None and t is not None and r is not None: + cmd = ffmpeg + [ + ' -ss ', + ss, + ' -t ', + t, + ' -i ', + vid_path, + ' -r ', + r, + + ' -qscale:v ', + ' 0.1 ', + ' -start_number ', + ' 0 ', + + outformat + ] + else: + cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] + + cmd = ''.join(cmd) + print(cmd) + if os.system(cmd) == 0: + print('Video: {} done'.format(vid_name)) + else: + print('Video: {} error'.format(vid_name)) + print('') + sys.stdout.flush() + return out_full_path + + +if __name__=='__main__': + paddle.enable_imperative() + args = parser.parse_args() + + predictor = DeOldifyPredictor(args.input, args.output, weight_path=args.weight_path) + frames_path, temp_video_path = predictor.run() + + print('output video path:', temp_video_path) \ No newline at end of file diff --git a/applications/DeOldify/resnet_backbone.py b/applications/DeOldify/resnet_backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..6c5195a0ea90961eba5c8939ab376af539d2e79d --- /dev/null +++ b/applications/DeOldify/resnet_backbone.py @@ -0,0 +1,183 @@ +import paddle +import paddle.nn as nn + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias_attr=False) + + +class BasicBlock(paddle.fluid.Layer): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm(planes) + self.relu = nn.ReLU() + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(paddle.fluid.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False) + self.bn1 = nn.BatchNorm(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False) + self.bn3 = nn.BatchNorm(planes * 4) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(paddle.fluid.Layer): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias_attr=False) + self.bn1 = nn.BatchNorm(64) + self.relu = nn.ReLU() + self.maxpool = nn.Pool2D(pool_size=3, pool_stride=2, pool_padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.Pool2D(7, pool_stride=1, pool_type='avg') + self.fc = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias_attr=False), + nn.BatchNorm(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = paddle.reshape(x, (x.shape[0], -1)) + x = self.fc(x) + + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + return model diff --git a/applications/DeOldify/spectral_norm.py b/applications/DeOldify/spectral_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..81500a51d48c46ad8f0628898209fad829f0c67e --- /dev/null +++ b/applications/DeOldify/spectral_norm.py @@ -0,0 +1,63 @@ +import numpy as np +from paddle import fluid +from paddle.fluid import dygraph +from paddle.fluid import layers as F +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.data_feeder import check_variable_and_dtype + +import paddle +import paddle.nn as nn + +class _SpectralNorm(nn.SpectralNorm): + def __init__(self, + weight_shape, + dim=0, + power_iters=1, + eps=1e-12, + dtype='float32'): + super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) + + def forward(self, weight): + check_variable_and_dtype(weight, "weight", ['float32', 'float64'], + 'SpectralNorm') + inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} + out = self._helper.create_variable_for_type_inference(self._dtype) + _power_iters = self._power_iters if self.training else 0 + self._helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={"Out": out, }, + attrs={ + "dim": self._dim, + "power_iters": _power_iters, #self._power_iters, + "eps": self._eps, + }) + + return out + + +class Spectralnorm(nn.Layer): + + def __init__(self, + layer, + dim=0, + power_iters=1, + eps=1e-12, + dtype='float32'): + super(Spectralnorm, self).__init__() + self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) + self.dim = dim + self.power_iters = power_iters + self.eps = eps + self.layer = layer + weight = layer._parameters['weight'] + del layer._parameters['weight'] + self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) + self.weight_orig.set_value(weight) + + + def forward(self, x): + weight = self.spectral_norm(self.weight_orig) + self.layer.weight = weight + out = self.layer(x) + return out diff --git a/applications/EDVR/configs/edvr_L.yaml b/applications/EDVR/configs/edvr_L.yaml deleted file mode 100644 index 91b05f945e5e8fcc751ce878fb67c513c752e5f4..0000000000000000000000000000000000000000 --- a/applications/EDVR/configs/edvr_L.yaml +++ /dev/null @@ -1,24 +0,0 @@ -MODEL: - name: "EDVR" - format: "png" - num_frames: 5 - center: 2 - num_filters: 128 #64 - deform_conv_groups: 8 - front_RBs: 5 - back_RBs: 40 #10 - predeblur: False - HR_in: False - w_TSA: True #False - -INFER: - scale: 4 - crop_size: 256 - interval_list: [1] - random_reverse: False - number_frames: 5 - batch_size: 1 - file_root: "/workspace/color/input_frames" - inference_model: "/workspace/PaddleGAN/applications/EDVR/data/inference_model" - use_flip: False - use_rot: False diff --git a/applications/EDVR/predict.py b/applications/EDVR/predict.py index c45904a2698d2e38702df2f91dc96b7ed1cf0ae8..a1cb1b98415f91f05d6333249f2468542fc2b6b2 100644 --- a/applications/EDVR/predict.py +++ b/applications/EDVR/predict.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. @@ -14,83 +14,41 @@ import os import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +sys.path.append(cur_path) + import time -import logging import argparse import ast +import glob import numpy as np -try: - import cPickle as pickle -except: - import pickle + import paddle.fluid as fluid import cv2 -from utils.config_utils import * -#import models -from reader import get_reader -#from metrics import get_metrics -from utils.utility import check_cuda -from utils.utility import check_version - -logging.root.handlers = [] -FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' -logging.basicConfig(level=logging.DEBUG, format=FORMAT, stream=sys.stdout) -logger = logging.getLogger(__name__) +from data import EDVRDataset +from paddle.incubate.hapi.download import get_path_from_url +EDVR_weight_url = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( - '--model_name', - type=str, - default='AttentionCluster', - help='name of model to train.') - parser.add_argument( - '--inference_model', - type=str, - default='./data/inference_model', - help='path of inference_model.') - parser.add_argument( - '--config', - type=str, - default='configs/attention_cluster.txt', - help='path to config file of model') - parser.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=True, - help='default use gpu.') - parser.add_argument( - '--batch_size', - type=int, - default=1, - help='sample number in a batch for inference.') - parser.add_argument( - '--filelist', + '--input', type=str, default=None, - help='path to inferenece data file lists file.') - parser.add_argument( - '--log_interval', - type=int, - default=1, - help='mini-batch interval to log.') + help='input video path') parser.add_argument( - '--infer_topk', - type=int, - default=20, - help='topk predictions to restore.') - parser.add_argument( - '--save_dir', + '--output', type=str, - default=os.path.join('data', 'predict_results'), - help='directory to store results') + default='output', + help='output path') parser.add_argument( - '--video_path', + '--weight_path', type=str, default=None, - help='directory to store results') + help='weight path') args = parser.parse_args() return args @@ -106,69 +64,143 @@ def get_img(pred): return pred def save_img(img, framename): - dirname = './demo/resultpng' - filename = os.path.join(dirname, framename+'.png') - cv2.imwrite(filename, img) - - -def infer(args): - # parse config - config = parse_config(args.config) - infer_config = merge_configs(config, 'infer', vars(args)) - print_configs(infer_config, "Infer") - inference_model = args.inference_model - model_filename = 'EDVR_model.pdmodel' - params_filename = 'EDVR_params.pdparams' - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - [inference_program, feed_list, fetch_list] = fluid.io.load_inference_model(dirname=inference_model, model_filename=model_filename, params_filename=params_filename, executor=exe) - - infer_reader = get_reader(args.model_name.upper(), 'infer', infer_config) - #infer_metrics = get_metrics(args.model_name.upper(), 'infer', infer_config) - #infer_metrics.reset() - - periods = [] - cur_time = time.time() - for infer_iter, data in enumerate(infer_reader()): - if args.model_name == 'EDVR': - data_feed_in = [items[0] for items in data] - video_info = [items[1:] for items in data] - infer_outs = exe.run(inference_program, - fetch_list=fetch_list, - feed={feed_list[0]:np.array(data_feed_in)}) - infer_result_list = [item for item in infer_outs] - videonames = [item[0] for item in video_info] - framenames = [item[1] for item in video_info] - for i in range(len(infer_result_list)): - img_i = get_img(infer_result_list[i]) - save_img(img_i, 'img' + videonames[i] + framenames[i]) - - - - prev_time = cur_time + dirname = os.path.dirname(framename) + if not os.path.exists(dirname): + os.makedirs(dirname) + + cv2.imwrite(framename, img) + + +def dump_frames_ffmpeg(vid_path, outpath, r=None, ss=None, t=None): + ffmpeg = ['ffmpeg ', ' -loglevel ', ' error '] + vid_name = vid_path.split('/')[-1].split('.')[0] + out_full_path = os.path.join(outpath, 'frames_input') + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = out_full_path + '/%08d.png' + + if ss is not None and t is not None and r is not None: + cmd = ffmpeg + [ + ' -ss ', + ss, + ' -t ', + t, + ' -i ', + vid_path, + ' -r ', + r, + ' -qscale:v ', + ' 0.1 ', + ' -start_number ', + ' 0 ', + outformat + ] + else: + cmd = ffmpeg + [' -i ', vid_path, ' -start_number ', ' 0 ', outformat] + + cmd = ''.join(cmd) + print(cmd) + if os.system(cmd) == 0: + print('Video: {} done'.format(vid_name)) + else: + print('Video: {} error'.format(vid_name)) + print('') + sys.stdout.flush() + return out_full_path + + +def frames_to_video_ffmpeg(framepath, videopath, r): + ffmpeg = ['ffmpeg ', ' -loglevel ', ' error '] + cmd = ffmpeg + [ + ' -r ', r, ' -f ', ' image2 ', ' -i ', framepath, ' -vcodec ', + ' libx264 ', ' -pix_fmt ', ' yuv420p ', ' -crf ', ' 16 ', videopath + ] + cmd = ''.join(cmd) + print(cmd) + + if os.system(cmd) == 0: + print('Video: {} done'.format(videopath)) + else: + print('Video: {} error'.format(videopath)) + print('') + sys.stdout.flush() + + +class EDVRPredictor: + def __init__(self, input, output, weight_path=None): + self.input = input + self.output = os.path.join(output, 'EDVR') + + place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() + self.exe = fluid.Executor(place) + + if weight_path is None: + weight_path = get_path_from_url(EDVR_weight_url, cur_path) + + print(weight_path) + + model_filename = 'EDVR_model.pdmodel' + params_filename = 'EDVR_params.pdparams' + + out = fluid.io.load_inference_model(dirname=weight_path, + model_filename=model_filename, + params_filename=params_filename, + executor=self.exe) + self.infer_prog, self.feed_list, self.fetch_list = out + + def run(self): + vid = self.input + base_name = os.path.basename(vid).split('.')[0] + output_path = os.path.join(self.output, base_name) + pred_frame_path = os.path.join(output_path, 'frames_pred') + + if not os.path.exists(output_path): + os.makedirs(output_path) + + if not os.path.exists(pred_frame_path): + os.makedirs(pred_frame_path) + + cap = cv2.VideoCapture(vid) + fps = cap.get(cv2.CAP_PROP_FPS) + + out_path = dump_frames_ffmpeg(vid, output_path) + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + + dataset = EDVRDataset(frames) + + periods = [] cur_time = time.time() - period = cur_time - prev_time - periods.append(period) - - #infer_metrics.accumulate(infer_result_list) + for infer_iter, data in enumerate(dataset): + data_feed_in = [data[0]] + + infer_outs = self.exe.run(self.infer_prog, + fetch_list=self.fetch_list, + feed={self.feed_list[0]:np.array(data_feed_in)}) + infer_result_list = [item for item in infer_outs] - if args.log_interval > 0 and infer_iter % args.log_interval == 0: - logger.info('Processed {} samples'.format(infer_iter + 1)) + frame_path = data[1] + + img_i = get_img(infer_result_list[0]) + save_img(img_i, os.path.join(pred_frame_path, os.path.basename(frame_path))) - logger.info('[INFER] infer finished. average time: {}'.format(np.mean(periods))) + prev_time = cur_time + cur_time = time.time() + period = cur_time - prev_time + periods.append(period) - if not os.path.isdir(args.save_dir): - os.makedirs(args.save_dir) + print('Processed {} samples'.format(infer_iter + 1)) + frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') + vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name)) + frames_to_video_ffmpeg(frame_pattern_combined, vid_out_path, str(int(fps))) - #infer_metrics.finalize_and_log_out(savedir=args.save_dir) + return frame_pattern_combined, vid_out_path if __name__ == "__main__": - args = parse_args() - # check whether the installed paddle is compiled with GPU - check_cuda(args.use_gpu) - check_version() - logger.info(args) + predictor = EDVRPredictor(args.input, args.output, args.weight_path) + predictor.run() - infer(args) diff --git a/applications/EDVR/reader/__init__.py b/applications/EDVR/reader/__init__.py deleted file mode 100644 index 080f19930bb2f495ea071059d656fdd7ba4558ea..0000000000000000000000000000000000000000 --- a/applications/EDVR/reader/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .reader_utils import regist_reader, get_reader -from .edvr_reader import EDVRReader - -regist_reader("EDVR", EDVRReader) diff --git a/applications/EDVR/reader/edvr_reader.py b/applications/EDVR/reader/edvr_reader.py deleted file mode 100644 index 75191254ea6394d4047963c0d51981f2f5707adb..0000000000000000000000000000000000000000 --- a/applications/EDVR/reader/edvr_reader.py +++ /dev/null @@ -1,434 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import cv2 -import math -import random -import multiprocessing -import functools -import numpy as np -import paddle -import cv2 -import logging -from .reader_utils import DataReader - -logger = logging.getLogger(__name__) -python_ver = sys.version_info - -random.seed(0) -np.random.seed(0) - -class EDVRReader(DataReader): - """ - Data reader for video super resolution task fit for EDVR model. - This is specified for REDS dataset. - """ - def __init__(self, name, mode, cfg): - super(EDVRReader, self).__init__(name, mode, cfg) - self.format = cfg.MODEL.format - self.crop_size = self.get_config_from_sec(mode, 'crop_size') - self.interval_list = self.get_config_from_sec(mode, 'interval_list') - self.random_reverse = self.get_config_from_sec(mode, 'random_reverse') - self.number_frames = self.get_config_from_sec(mode, 'number_frames') - # set batch size and file list - self.batch_size = cfg[mode.upper()]['batch_size'] - self.fileroot = cfg[mode.upper()]['file_root'] - self.use_flip = self.get_config_from_sec(mode, 'use_flip', False) - self.use_rot = self.get_config_from_sec(mode, 'use_rot', False) - - self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads', 1) - self.buf_size = self.get_config_from_sec(mode, 'buf_size', 1024) - self.fix_random_seed = self.get_config_from_sec(mode, 'fix_random_seed', False) - - if self.mode != 'infer': - self.gtroot = self.get_config_from_sec(mode, 'gt_root') - self.scale = self.get_config_from_sec(mode, 'scale', 1) - self.LR_input = (self.scale > 1) - if self.fix_random_seed: - random.seed(0) - np.random.seed(0) - self.num_reader_threads = 1 - - def create_reader(self): - logger.info('initialize reader ... ') - self.filelist = [] - for video_name in os.listdir(self.fileroot): - if (self.mode == 'train') and (video_name in ['000', '011', '015', '020']): - continue - for frame_name in os.listdir(os.path.join(self.fileroot, video_name)): - frame_idx = frame_name.split('.')[0] - video_frame_idx = video_name + '_' + frame_idx - # for each item in self.filelist is like '010_00000015', '260_00000090' - self.filelist.append(video_frame_idx) - if self.mode == 'test' or self.mode == 'infer': - self.filelist.sort() - - if self.num_reader_threads == 1: - reader_func = make_reader - else: - reader_func = make_multi_reader - - if self.mode != 'infer': - return reader_func(filelist = self.filelist, - num_threads = self.num_reader_threads, - batch_size = self.batch_size, - is_training = (self.mode == 'train'), - number_frames = self.number_frames, - interval_list = self.interval_list, - random_reverse = self.random_reverse, - fileroot = self.fileroot, - crop_size = self.crop_size, - use_flip = self.use_flip, - use_rot = self.use_rot, - gtroot = self.gtroot, - LR_input = self.LR_input, - scale = self.scale, - mode = self.mode) - else: - return reader_func(filelist = self.filelist, - num_threads = self.num_reader_threads, - batch_size = self.batch_size, - is_training = (self.mode == 'train'), - number_frames = self.number_frames, - interval_list = self.interval_list, - random_reverse = self.random_reverse, - fileroot = self.fileroot, - crop_size = self.crop_size, - use_flip = self.use_flip, - use_rot = self.use_rot, - gtroot = '', - LR_input = True, - scale = 4, - mode = self.mode) - - -def get_sample_data(item, number_frames, interval_list, random_reverse, fileroot, - crop_size, use_flip, use_rot, gtroot, LR_input, scale, mode='train'): - video_name = item.split('_')[0] - frame_name = item.split('_')[1] - if (mode == 'train') or (mode == 'valid'): - ngb_frames, name_b = get_neighbor_frames(frame_name, \ - number_frames = number_frames, \ - interval_list = interval_list, \ - random_reverse = random_reverse) - elif (mode == 'test') or (mode == 'infer'): - ngb_frames, name_b = get_test_neighbor_frames(int(frame_name), number_frames) - else: - raise NotImplementedError('mode {} not implemented'.format(mode)) - frame_name = name_b - print('key2', ngb_frames, name_b) - if mode != 'infer': - img_GT = read_img(os.path.join(gtroot, video_name, frame_name + '.png'), is_gt=True) - #print('gt_mean', np.mean(img_GT)) - frame_list = [] - for ngb_frm in ngb_frames: - ngb_name = "%04d"%ngb_frm - #img = read_img(os.path.join(fileroot, video_name, frame_name + '.png')) - img = read_img(os.path.join(fileroot, video_name, ngb_name + '.png')) - frame_list.append(img) - #print('img_mean', np.mean(img)) - - H, W, C = frame_list[0].shape - # add random crop - if (mode == 'train') or (mode == 'valid'): - if LR_input: - LQ_size = crop_size // scale - rnd_h = random.randint(0, max(0, H - LQ_size)) - rnd_w = random.randint(0, max(0, W - LQ_size)) - #print('rnd_h {}, rnd_w {}', rnd_h, rnd_w) - frame_list = [v[rnd_h:rnd_h + LQ_size, rnd_w:rnd_w + LQ_size, :] for v in frame_list] - rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale) - img_GT = img_GT[rnd_h_HR:rnd_h_HR + crop_size, rnd_w_HR:rnd_w_HR + crop_size, :] - else: - rnd_h = random.randint(0, max(0, H - crop_size)) - rnd_w = random.randint(0, max(0, W - crop_size)) - frame_list = [v[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :] for v in frame_list] - img_GT = img_GT[rnd_h:rnd_h + crop_size, rnd_w:rnd_w + crop_size, :] - - # add random flip and rotation - if mode != 'infer': - frame_list.append(img_GT) - if (mode == 'train') or (mode == 'valid'): - rlt = img_augment(frame_list, use_flip, use_rot) - else: - rlt = frame_list - if mode != 'infer': - frame_list = rlt[0:-1] - img_GT = rlt[-1] - else: - frame_list = rlt - - # stack LQ images to NHWC, N is the frame number - img_LQs = np.stack(frame_list, axis=0) - # BGR to RGB, HWC to CHW, numpy to tensor - img_LQs = img_LQs[:, :, :, [2, 1, 0]] - img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32') - if mode != 'infer': - img_GT = img_GT[:, :, [2, 1, 0]] - img_GT = np.transpose(img_GT, (2, 0, 1)).astype('float32') - - return img_LQs, img_GT - else: - return img_LQs - -def get_test_neighbor_frames(crt_i, N, max_n=100, padding='new_info'): - """Generate an index list for reading N frames from a sequence of images - Args: - crt_i (int): current center index - max_n (int): max number of the sequence of images (calculated from 1) - N (int): reading N frames - padding (str): padding mode, one of replicate | reflection | new_info | circle - Example: crt_i = 0, N = 5 - replicate: [0, 0, 0, 1, 2] - reflection: [2, 1, 0, 1, 2] - new_info: [4, 3, 0, 1, 2] - circle: [3, 4, 0, 1, 2] - - Returns: - return_l (list [int]): a list of indexes - """ - max_n = max_n - 1 - n_pad = N // 2 - return_l = [] - - for i in range(crt_i - n_pad, crt_i + n_pad + 1): - if i < 0: - if padding == 'replicate': - add_idx = 0 - elif padding == 'reflection': - add_idx = -i - elif padding == 'new_info': - add_idx = (crt_i + n_pad) + (-i) - elif padding == 'circle': - add_idx = N + i - else: - raise ValueError('Wrong padding mode') - elif i > max_n: - if padding == 'replicate': - add_idx = max_n - elif padding == 'reflection': - add_idx = max_n * 2 - i - elif padding == 'new_info': - add_idx = (crt_i - n_pad) - (i - max_n) - elif padding == 'circle': - add_idx = i - N - else: - raise ValueError('Wrong padding mode') - else: - add_idx = i - return_l.append(add_idx) - name_b = '{:08d}'.format(crt_i) - return return_l, name_b - - -def get_neighbor_frames(frame_name, number_frames, interval_list, random_reverse, max_frame=99, bordermode=False): - center_frame_idx = int(frame_name) - half_N_frames = number_frames // 2 - #### determine the neighbor frames - interval = random.choice(interval_list) - if bordermode: - direction = 1 # 1: forward; 0: backward - if random_reverse and random.random() < 0.5: - direction = random.choice([0, 1]) - if center_frame_idx + interval * (number_frames - 1) > max_frame: - direction = 0 - elif center_frame_idx - interval * (number_frames - 1) < 0: - direction = 1 - # get the neighbor list - if direction == 1: - neighbor_list = list( - range(center_frame_idx, center_frame_idx + interval * number_frames, interval)) - else: - neighbor_list = list( - range(center_frame_idx, center_frame_idx - interval * number_frames, -interval)) - name_b = '{:08d}'.format(neighbor_list[0]) - else: - # ensure not exceeding the borders - while (center_frame_idx + half_N_frames * interval > - max_frame) or (center_frame_idx - half_N_frames * interval < 0): - center_frame_idx = random.randint(0, max_frame) - # get the neighbor list - neighbor_list = list( - range(center_frame_idx - half_N_frames * interval, - center_frame_idx + half_N_frames * interval + 1, interval)) - if random_reverse and random.random() < 0.5: - neighbor_list.reverse() - name_b = '{:08d}'.format(neighbor_list[half_N_frames]) - assert len(neighbor_list) == number_frames, \ - "frames slected have length({}), but it should be ({})".format(len(neighbor_list), number_frames) - - return neighbor_list, name_b - - -def read_img(path, size=None, is_gt=False): - """read image by cv2 - return: Numpy float32, HWC, BGR, [0,1]""" - img = cv2.imread(path, cv2.IMREAD_UNCHANGED) - #if not is_gt: - # #print(path) - # img = cv2.resize(img, (0, 0), fx=0.25, fy=0.25) - #print("path: ", path) - img = img.astype(np.float32) / 255. - if img.ndim == 2: - img = np.expand_dims(img, axis=2) - # some images have 4 channels - if img.shape[2] > 3: - img = img[:, :, :3] - return img - - -def img_augment(img_list, hflip=True, rot=True): - """horizontal flip OR rotate (0, 90, 180, 270 degrees)""" - hflip = hflip and random.random() < 0.5 - vflip = rot and random.random() < 0.5 - rot90 = rot and random.random() < 0.5 - - def _augment(img): - if hflip: - img = img[:, ::-1, :] - if vflip: - img = img[::-1, :, :] - if rot90: - img = img.transpose(1, 0, 2) - return img - - return [_augment(img) for img in img_list] - - -def make_reader(filelist, - num_threads, - batch_size, - is_training, - number_frames, - interval_list, - random_reverse, - fileroot, - crop_size, - use_flip, - use_rot, - gtroot, - LR_input, - scale, - mode='train'): - fl = filelist - def reader_(): - if is_training: - random.shuffle(fl) - batch_out = [] - for item in fl: - if mode != 'infer': - img_LQs, img_GT = get_sample_data(item, - number_frames, interval_list, random_reverse, fileroot, - crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode) - else: - img_LQs = get_sample_data(item, - number_frames, interval_list, random_reverse, fileroot, - crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode) - videoname = item.split('_')[0] - framename = item.split('_')[1] - if (mode == 'train') or (mode == 'valid'): - batch_out.append((img_LQs, img_GT)) - elif mode == 'test': - batch_out.append((img_LQs, img_GT, videoname, framename)) - elif mode == 'infer': - batch_out.append((img_LQs, videoname, framename)) - else: - raise NotImplementedError("mode {} not implemented".format(mode)) - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - return reader_ - - -def make_multi_reader(filelist, - num_threads, - batch_size, - is_training, - number_frames, - interval_list, - random_reverse, - fileroot, - crop_size, - use_flip, - use_rot, - gtroot, - LR_input, - scale, - mode='train'): - def read_into_queue(flq, queue): - batch_out = [] - for item in flq: - if mode != 'infer': - img_LQs, img_GT = get_sample_data(item, - number_frames, interval_list, random_reverse, fileroot, - crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode) - else: - img_LQs = get_sample_data(item, - number_frames, interval_list, random_reverse, fileroot, - crop_size,use_flip, use_rot, gtroot, LR_input, scale, mode) - videoname = item.split('_')[0] - framename = item.split('_')[1] - if (mode == 'train') or (mode == 'valid'): - batch_out.append((img_LQs, img_GT)) - elif mode == 'test': - batch_out.append((img_LQs, img_GT, videoname, framename)) - elif mode == 'infer': - batch_out.append((img_LQs, videoname, framename)) - else: - raise NotImplementedError("mode {} not implemented".format(mode)) - if len(batch_out) == batch_size: - queue.put(batch_out) - batch_out = [] - queue.put(None) - - - def queue_reader(): - fl = filelist - if is_training: - random.shuffle(fl) - - n = num_threads - queue_size = 20 - reader_lists = [None] * n - file_num = int(len(fl) // n) - for i in range(n): - if i < len(reader_lists) - 1: - tmp_list = fl[i * file_num:(i + 1) * file_num] - else: - tmp_list = fl[i * file_num:] - reader_lists[i] = tmp_list - - queue = multiprocessing.Queue(queue_size) - p_list = [None] * len(reader_lists) - # for reader_list in reader_lists: - for i in range(len(reader_lists)): - reader_list = reader_lists[i] - p_list[i] = multiprocessing.Process( - target=read_into_queue, args=(reader_list, queue)) - p_list[i].start() - reader_num = len(reader_lists) - finish_num = 0 - while finish_num < reader_num: - sample = queue.get() - if sample is None: - finish_num += 1 - else: - yield sample - for i in range(len(p_list)): - if p_list[i].is_alive(): - p_list[i].join() - - return queue_reader diff --git a/applications/EDVR/reader/reader_utils.py b/applications/EDVR/reader/reader_utils.py deleted file mode 100644 index 93e1f3a6aebe3f1812bf3b4ecb7745dd56cd8fcd..0000000000000000000000000000000000000000 --- a/applications/EDVR/reader/reader_utils.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import pickle -import cv2 -import numpy as np -import random - - -class ReaderNotFoundError(Exception): - "Error: reader not found" - - def __init__(self, reader_name, avail_readers): - super(ReaderNotFoundError, self).__init__() - self.reader_name = reader_name - self.avail_readers = avail_readers - - def __str__(self): - msg = "Reader {} Not Found.\nAvailiable readers:\n".format( - self.reader_name) - for reader in self.avail_readers: - msg += " {}\n".format(reader) - return msg - - -class DataReader(object): - """data reader for video input""" - - def __init__(self, model_name, mode, cfg): - self.name = model_name - self.mode = mode - self.cfg = cfg - - def create_reader(self): - """Not implemented""" - pass - - def get_config_from_sec(self, sec, item, default=None): - if sec.upper() not in self.cfg: - return default - return self.cfg[sec.upper()].get(item, default) - - -class ReaderZoo(object): - def __init__(self): - self.reader_zoo = {} - - def regist(self, name, reader): - assert reader.__base__ == DataReader, "Unknow model type {}".format( - type(reader)) - self.reader_zoo[name] = reader - - def get(self, name, mode, cfg): - for k, v in self.reader_zoo.items(): - if k == name: - return v(name, mode, cfg) - raise ReaderNotFoundError(name, self.reader_zoo.keys()) - - -# singleton reader_zoo -reader_zoo = ReaderZoo() - - -def regist_reader(name, reader): - reader_zoo.regist(name, reader) - - -def get_reader(name, mode, cfg): - reader_model = reader_zoo.get(name, mode, cfg) - return reader_model.create_reader() diff --git a/applications/EDVR/run.sh b/applications/EDVR/run.sh index 7f05d6b9d31354a8eecfb9c8656c9d9545b0bac7..271eefe33dd734c09cc7acd82c181e36ad3c33c9 100644 --- a/applications/EDVR/run.sh +++ b/applications/EDVR/run.sh @@ -17,10 +17,10 @@ valid_interval=1 weights="./weights/paddle_state_dict_L.npz" -export CUDA_VISIBLE_DEVICES=4,5,6,7 #0,1,5,6 fast, 2,3,4,7 slow -export FLAGS_fast_eager_deletion_mode=1 -export FLAGS_eager_delete_tensor_gb=0.0 -export FLAGS_fraction_of_gpu_memory_to_use=0.98 +export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow +# export FLAGS_fast_eager_deletion_mode=1 +# export FLAGS_eager_delete_tensor_gb=0.0 +# export FLAGS_fraction_of_gpu_memory_to_use=0.98 if [ "$mode"x == "predict"x ]; then echo $mode $name $configs $weights diff --git a/applications/EDVR/utils/__init__.py b/applications/EDVR/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/applications/EDVR/utils/config_utils.py b/applications/EDVR/utils/config_utils.py deleted file mode 100644 index 1acb9d28fc0bf15c54b210f6ed11c13d803c1043..0000000000000000000000000000000000000000 --- a/applications/EDVR/utils/config_utils.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import yaml -from .utility import AttrDict -import logging -logger = logging.getLogger(__name__) - -CONFIG_SECS = [ - 'train', - 'valid', - 'test', - 'infer', -] - - -def parse_config(cfg_file): - """Load a config file into AttrDict""" - import yaml - with open(cfg_file, 'r') as fopen: - yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) - create_attr_dict(yaml_config) - return yaml_config - - -def create_attr_dict(yaml_config): - from ast import literal_eval - for key, value in yaml_config.items(): - if type(value) is dict: - yaml_config[key] = value = AttrDict(value) - if isinstance(value, str): - try: - value = literal_eval(value) - except BaseException: - pass - if isinstance(value, AttrDict): - create_attr_dict(yaml_config[key]) - else: - yaml_config[key] = value - return - - -def merge_configs(cfg, sec, args_dict): - assert sec in CONFIG_SECS, "invalid config section {}".format(sec) - sec_dict = getattr(cfg, sec.upper()) - for k, v in args_dict.items(): - if v is None: - continue - try: - if hasattr(sec_dict, k): - setattr(sec_dict, k, v) - except: - pass - return cfg - - -def print_configs(cfg, mode): - logger.info("---------------- {:>5} Arguments ----------------".format( - mode)) - for sec, sec_items in cfg.items(): - logger.info("{}:".format(sec)) - for k, v in sec_items.items(): - logger.info(" {}:{}".format(k, v)) - logger.info("-------------------------------------------------") diff --git a/applications/EDVR/utils/utility.py b/applications/EDVR/utils/utility.py deleted file mode 100644 index ced1e7d757ff5697c0fe61f130849524491da3b0..0000000000000000000000000000000000000000 --- a/applications/EDVR/utils/utility.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import signal -import logging -import paddle -import paddle.fluid as fluid - -__all__ = ['AttrDict'] - -logger = logging.getLogger(__name__) - - -def _term(sig_num, addition): - print('current pid is %s, group id is %s' % (os.getpid(), os.getpgrp())) - os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) - - -signal.signal(signal.SIGTERM, _term) -signal.signal(signal.SIGINT, _term) - - -class AttrDict(dict): - def __getattr__(self, key): - return self[key] - - def __setattr__(self, key, value): - if key in self.__dict__: - self.__dict__[key] = value - else: - self[key] = value - -def check_cuda(use_cuda, err = \ - "\nYou can not set use_gpu = True in the model because you are using paddlepaddle-cpu.\n \ - Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu = False to run models on CPU.\n" - ): - try: - if use_cuda == True and fluid.is_compiled_with_cuda() == False: - print(err) - sys.exit(1) - except Exception as e: - pass - - -def check_version(): - """ - Log error and exit when the installed version of paddlepaddle is - not satisfied. - """ - err = "PaddlePaddle version 1.6 or higher is required, " \ - "or a suitable develop version is satisfied as well. \n" \ - "Please make sure the version is good with your code." \ - - try: - fluid.require_version('1.6.0') - except Exception as e: - logger.error(err) - sys.exit(1) diff --git a/applications/run.sh b/applications/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..f346576a70809ebd98a0620fb76feeecea2e9748 --- /dev/null +++ b/applications/run.sh @@ -0,0 +1,13 @@ +cd DAIN/pwcnet/correlation_op +# 第一次需要执行 +# bash make.shap +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'` +export PYTHONPATH=$PYTHONPATH:`pwd` +cd - + +# input 输入视频的路径 +# output 输出视频保存的路径 +# proccess_order 使用模型的顺序 + +python tools/main.py \ +--input input.mp4 --output output --proccess_order DAIN DeOldify EDVR diff --git a/applications/tools/main.py b/applications/tools/main.py new file mode 100644 index 0000000000000000000000000000000000000000..3062290d78bd6c08568f185d6720843756736e91 --- /dev/null +++ b/applications/tools/main.py @@ -0,0 +1,48 @@ +import sys +sys.path.append('.') + +import argparse +import paddle + +from DAIN.predict import VideoFrameInterp +from DeOldify.predict import DeOldifyPredictor +from EDVR.predict import EDVRPredictor + +parser = argparse.ArgumentParser(description='Fix video') +parser.add_argument('--input', type=str, default=None, help='Input video') +parser.add_argument('--output', type=str, default='output', help='output dir') +parser.add_argument('--DAIN_weight', type=str, default=None, help='Path to the reference image directory') +parser.add_argument('--DeOldify_weight', type=str, default=None, help='Path to the reference image directory') +parser.add_argument('--EDVR_weight', type=str, default=None, help='Path to the reference image directory') +# DAIN args +parser.add_argument('--time_step', type=float, default=0.5, help='choose the time steps') +parser.add_argument('--proccess_order', type=str, default='none', nargs='+', help='Process order') + + +if __name__ == "__main__": + args = parser.parse_args() + print('args...', args) + orders = args.proccess_order + temp_video_path = None + + for order in orders: + if order == 'DAIN': + predictor = VideoFrameInterp(args.time_step, args.DAIN_weight, + args.input, output_path=args.output) + frames_path, temp_video_path = predictor.run() + elif order == 'DeOldify': + print('frames:', frames_path) + print('video_path:', temp_video_path) + + paddle.disable_static() + predictor = DeOldifyPredictor(temp_video_path, args.output, weight_path=args.DeOldify_weight) + frames_path, temp_video_path = predictor.run() + print('frames:', frames_path) + print('video_path:', temp_video_path) + paddle.enable_static() + elif order == 'EDVR': + predictor = EDVRPredictor(temp_video_path, args.output, weight_path=args.EDVR_weight) + frames_path, temp_video_path = predictor.run() + print('frames:', frames_path) + print('video_path:', temp_video_path) +