未验证 提交 4f7ce1c1 编写于 作者: L LielinJiang 提交者: GitHub

Merge pull request #32 from LielinJiang/apps

Reproduce application module
import os
import datetime
import argparse
import numpy
import networks
modelnames = networks.__all__
# import datasets
datasetNames = ('Vimeo_90K_interp') #datasets.__all__
parser = argparse.ArgumentParser(description='DAIN')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
parser.add_argument('--netName',
type=str,
default='DAIN',
choices=modelnames,
help='model architecture: ' + ' | '.join(modelnames) +
' (default: DAIN)')
parser.add_argument('--datasetName',
default='Vimeo_90K_interp',
choices=datasetNames,
nargs='+',
help='dataset type : ' + ' | '.join(datasetNames) +
' (default: Vimeo_90K_interp)')
parser.add_argument('--video_path',
default='',
help='the path of selected videos')
parser.add_argument('--output_path', default='', help='the output root path')
parser.add_argument('--seed',
type=int,
default=1,
help='random seed (default: 1)')
parser.add_argument('--batch_size',
'-b',
type=int,
default=1,
help='batch size (default:1)')
parser.add_argument('--channels',
'-c',
type=int,
default=3,
choices=[1, 3],
help='channels of images (default:3)')
parser.add_argument('--filter_size',
'-f',
type=int,
default=4,
help='the size of filters used (default: 4)',
choices=[2, 4, 6, 5, 51])
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
parser.add_argument(
'--alpha',
type=float,
nargs='+',
default=[0.0, 1.0],
help=
'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
)
parser.add_argument('--frame_rate',
type=int,
default=None,
help='frame rate of the input video')
parser.add_argument('--patience',
type=int,
default=5,
help='the patience of reduce on plateou')
parser.add_argument('--factor',
type=float,
default=0.2,
help='the factor of reduce on plateou')
parser.add_argument('--saved_model',
type=str,
default='',
help='path to the model weights')
parser.add_argument('--no-date',
action='store_true',
help='don\'t append date timestamp to folder')
parser.add_argument('--use_cuda',
default=True,
type=bool,
help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
parser.add_argument('--remove_duplicates',
default=True,
type=bool,
help='remove duplicate frames or not')
from .dain import DAIN
from .dain_slowmotion import DAIN_slowmotion
__all__ = ('DAIN', 'DAIN_slowmotion')
import paddle.fluid as fluid
import resblock
import pwcnet
class DAIN(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
assert (timestep == 0.5)
self.numFrames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
# print(input.shape[0])
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.numFrames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]]
# Warp image use warp-op in PWC-Net
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output = (ref0 + ref2) / 2.0
rectify_input = fluid.layers.concat([
cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1]
],
axis=1)
cur_output_rectified = self.rectifyNet(rectify_input) + cur_output
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import paddle.fluid as fluid
import resblock
import time
import pwcnet
class DAIN_slowmotion(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN_slowmotion, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
self.num_frames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
# inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2
inplanes = 13
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.num_frames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
'''
STEP 3.4: perform the frame interpolation process
'''
count = 0
for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0],
cur_offset_outputs[1],
time_offsets):
cur_offset_output = [temp_0, temp_1]
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output_temp = (ref0 + ref2) / 2.0
if count == 0:
cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0)
else:
cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0)
cur_output = fluid.layers.concat([cur_output, cur_output_],
axis=0)
rectify_input = fluid.layers.concat([
cur_output_temp, ref0, ref2, cur_offset_output[0],
cur_offset_output[1]
],
axis=1)
cur_output_rectified_temp = self.rectifyNet(
rectify_input) + cur_output_temp
if count == 0:
cur_output_rectified = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
else:
cur_output_rectified_ = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
cur_output_rectified = fluid.layers.concat(
[cur_output_rectified, cur_output_rectified_], axis=0)
count += 1
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time
import glob
import numpy as np
from imageio import imread, imsave
from tqdm import tqdm
import cv2
import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from util import *
from my_args import parser
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
def infer_engine(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
min_subgraph_size=3):
if not use_gpu and not run_mode == 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
precision_map = {
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}
config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
os.path.join(model_dir, 'params'))
if use_gpu:
# initial GPU memory(M), device ID
config.enable_use_gpu(100, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=False)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = fluid.core.create_paddle_predictor(config)
return predictor
def executor(model_dir, use_gpu=False):
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
program, feed_names, fetch_targets = fluid.io.load_inference_model(
dirname=model_dir,
executor=exe,
model_filename='model',
params_filename='params')
return exe, program, fetch_targets
class VideoFrameInterp(object):
def __init__(self,
time_step,
model_path,
video_path,
use_gpu=True,
key_frame_thread=0.,
output_path='output',
remove_duplicates=True):
self.video_path = video_path
self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.model_path = model_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.exe, self.program, self.fetch_targets = executor(model_path,
use_gpu=use_gpu)
def run(self):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
if self.video_path.endswith('.mp4'):
videos = [self.video_path]
else:
videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps)
times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2)
out_path = video2frames(vid, frame_path_input)
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
orig_frames = len(frames)
need_frames = orig_frames * times_interp
if remove_duplicates:
frames = remove_duplicates(out_path)
left_frames = len(frames)
timestep = left_frames / need_frames
num_frames = int(1.0 / timestep) - 1
img = imread(frames[0])
int_width = img.shape[1]
int_height = img.shape[0]
channel = img.shape[2]
if not channel == 3:
continue
if int_width != ((int_width >> 7) << 7):
int_width_pad = (
((int_width >> 7) + 1) << 7) # more than necessary
padding_left = int((int_width_pad - int_width) / 2)
padding_right = int_width_pad - int_width - padding_left
else:
int_width_pad = int_width
padding_left = 32
padding_right = 32
if int_height != ((int_height >> 7) << 7):
int_height_pad = (
((int_height >> 7) + 1) << 7) # more than necessary
padding_top = int((int_height_pad - int_height) / 2)
padding_bottom = int_height_pad - int_height - padding_top
else:
int_height_pad = int_height
padding_top = 32
padding_bottom = 32
frame_num = len(frames)
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated,
vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname))
for i in tqdm(range(frame_num - 1)):
first = frames[i]
second = frames[i + 1]
img_first = imread(first)
img_second = imread(second)
'''--------------Frame change test------------------------'''
img_first_gray = np.dot(img_first[..., :3],
[0.299, 0.587, 0.114])
img_second_gray = np.dot(img_second[..., :3],
[0.299, 0.587, 0.114])
img_first_gray = img_first_gray.flatten(order='C')
img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False
if corr < self.key_frame_thread:
key_frame = True
'''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png')
video_pattern_output = os.path.join(video_path_output,
vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step,
args.saved_model,
args.video_path,
args.output_path,
remove_duplicates=args.remove_duplicates)
predictor.run()
自定义OP编译:
2. sh make.sh编译成correlation_lib.so动态库
3. 添加动态库路径到LD_LIBRARY_PATH:
```
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'`
```
4. 添加correlation op的python路径:
```
export PYTHONPATH=$PYTHONPATH:`pwd`
```
5. python test_correlation.py运行单测,验证是否加载成功。
PS: 如果paddle whl包是从官网上下载的,需要使用gcc 4.8,即把make.sh中的g++ 改为 g++-4.8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import os
file_dir = os.path.dirname(os.path.abspath(__file__))
fluid.load_op_library(os.path.join(file_dir, 'correlation_lib.so'))
from paddle.fluid.layer_helper import LayerHelper
def correlation(input1,
input2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply=1):
helper = LayerHelper("correlation", **locals())
output = helper.create_variable_for_type_inference(dtype=input1.dtype)
helper.append_op(type="correlation",
inputs={
"Input1": input1,
"Input2": input2
},
attrs={
"pad_size": pad_size,
"kernel_size": kernel_size,
"max_displacement": max_displacement,
"stride1": stride1,
"stride2": stride2,
"corr_type_multiply": corr_type_multiply
},
outputs={"Output": output})
return output
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
inline std::vector<int64_t> CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) {
std::vector<int64_t> output_shape({batch});
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int padded_input_height = input_height + 2 * pad_size;
int padded_input_width = input_width + 2 * pad_size;
int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1);
output_shape.push_back(output_channel);
int output_height = std::ceil(static_cast<float>(padded_input_height - 2 * border_radius) / static_cast<float>(stride1));
int output_width = std::ceil(static_cast<float>(padded_input_width - 2 * border_radius) / static_cast<float>(stride1));
output_shape.push_back(output_height);
output_shape.push_back(output_width);
return output_shape;
}
class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override{
AddInput("Input1", "input1");
AddInput("Input2", "input2");
AddOutput("Output", "output");
AddAttr<int>("pad_size", "pad size for input1 and input2");
AddAttr<int>("kernel_size", "kernel size of input1 and input2");
AddAttr<int>("max_displacement", "max displacement of input1 and input2");
AddAttr<int>("stride1", "Input1 stride");
AddAttr<int>("stride2", "Input2 stride");
AddAttr<int>("corr_type_multiply", "correlation coefficient").SetDefault(1);
AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC");
}
};
class CorrelationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null");
int stride1 = ctx->Attrs().Get<int>("stride1");
int stride2 = ctx->Attrs().Get<int>("stride2");
int max_displacement = ctx->Attrs().Get<int>("max_displacement");
int pad_size = ctx->Attrs().Get<int>("pad_size");
int kernel_size = ctx->Attrs().Get<int>("kernel_size");
auto in_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims");
PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims");
std::vector<int64_t> output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement);
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(), "Input1 and Input2 shoule have same type");
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
template <typename T>
class CorrelationOpGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("correlation_grad");
op->SetInput("Input1", this->Input("Input1"));
op->SetInput("Input2", this->Input("Input2"));
op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1"));
op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2"));
op->SetAttrMap(this->Attrs());
}
};
class CorrelationOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null");
auto in1_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims);
ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
const auto* var = ctx.InputVar(framework::GradVarName("Output"));
if (var == nullptr) {
PADDLE_THROW("cannot find Output@GRAD");
}
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker,
ops::CorrelationOpGradMaker<paddle::framework::OpDesc>,
ops::CorrelationOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#define THREADS_PER_BLOCK 32
#define FULL_MASK 0xffffffff
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
__forceinline__ __device__ T warpReduceSum(T val) {
for (int offset = 16; offset > 0; offset /= 2) {
val += __shfl_down_sync(FULL_MASK, val, offset);
}
return val;
}
template <typename T>
__forceinline__ __device__ T blockReduceSum(T val) {
static __shared__ T shared[32];
int lane = threadIdx.x % warpSize;
int wid = threadIdx.x / warpSize;
val = warpReduceSum(val);
if (lane == 0)
shared[wid] = val;
__syncthreads();
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
if (wid == 0)
val = warpReduceSum(val);
return val;
}
template <typename T>
__global__ void set_zero(T *x, int num) {
for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x)
x[i] = static_cast<T>(0);
}
template <typename T>
__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) {
int n = blockIdx.x;
int h = blockIdx.y;
int w = blockIdx.z;
int ch_off = threadIdx.x;
T value;
int dimchw = channel * height * width;
int dimhw = height * width;
int p_dimw = (width + 2 * pad_size);
int p_dimh = (height + 2 * pad_size);
int p_dimchw = channel * p_dimw * p_dimh;
int p_dimcw = channel * p_dimw;
for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) {
value = input[n * dimchw + c * dimhw + h * width + w];
rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value;
}
}
template <typename T>
__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int n = blockIdx.x;
int h1 = blockIdx.y * stride1 + max_displacement;
int w1 = blockIdx.z * stride1 + max_displacement;
int c = threadIdx.x;
int p_dimchw = p_input_height * p_input_width * input_channel;
int p_dimcw = p_input_width * input_channel;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int nelems = kernel_size * kernel_size * p_dimc;
for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
int w2 = w1 + ti * stride2;
int h2 = h1 + tj * stride2;
T acc0 = 0;
for(int j = -kernel_rad; j <= kernel_rad; ++j) {
for(int i = -kernel_rad; i <= kernel_rad; ++i) {
for(int ch = c; ch < p_dimc; ch += blockDim.x) {
int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch;
int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch;
acc0 += static_cast<T>(rinput1[index1] * rinput2[index2]);
}
}
}
if (blockDim.x == warpSize) {
__syncwarp();
acc0 = warpReduceSum(acc0);
} else {
__syncthreads();
acc0 = blockReduceSum(acc0);
}
if (threadIdx.x == 0) {
int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad);
const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z;
output[t_index] = static_cast<T>(acc0 / nelems);
}
}
}
}
//class CorrelationKernel<platform::CUDADeviceContext, T>
template <typename T>
class CorrelationKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace");
auto *input1 = ctx.Input<Tensor>("Input1");
auto *input2 = ctx.Input<Tensor>("Input2");
int pad_size = ctx.Attr<int>("pad_size");
int kernel_size = ctx.Attr<int>("kernel_size");
int stride1 = ctx.Attr<int>("stride1");
int stride2 = ctx.Attr<int>("stride2");
int max_displacement = ctx.Attr<int>("max_displacement");
int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
// base on input1, NCHW
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data<T>(), output->numel());
auto out_dims = output->dims();
int OC = out_dims[1];
int OH = out_dims[2];
int OW = out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(N, OH, OW);
correlation_forward<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(output->data<T>(), OC, OH, OW, rinput1.data<T>(),
C, H, W, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
};
template <typename T>
__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int xmin = (w - kernel_rad - max_displacement) / stride1;
int ymin = (h - kernel_rad - max_displacement) / stride1;
int xmax = (w + kernel_rad - max_displacement) / stride1;
int ymax = (h + kernel_rad - max_displacement) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
return;
}
if (xmin > xmax || ymin > ymax) {
return;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c;
T val2 = rinput2[index2];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val2;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input1[index1] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int xmin = (w - kernel_rad - max_displacement - i2) / stride1;
int ymin = (h - kernel_rad - max_displacement - j2) / stride1;
int xmax = (w + kernel_rad - max_displacement - i2) / stride1;
int ymax = (h + kernel_rad - max_displacement - j2) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
continue;
}
if (xmin > xmax || ymin > ymax) {
continue;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c;
T val1 = rinput1[index1];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val1;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input2[index2] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
class CorrelationGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace.");
const auto *input1 = ctx.Input<Tensor>("Input1");
const auto *input2 = ctx.Input<Tensor>("Input2");
const auto *grad_output = ctx.Input<Tensor>(framework::GradVarName("Output"));
const int pad_size = ctx.Attr<int>("pad_size");
const int kernel_size = ctx.Attr<int>("kernel_size");
const int stride1 = ctx.Attr<int>("stride1");
const int stride2 = ctx.Attr<int>("stride2");
const int max_displacement = ctx.Attr<int>("max_displacement");
const int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *grad_input1 = ctx.Output<Tensor>(framework::GradVarName("Input1"));
grad_input1->mutable_data<T>(ctx.GetPlace());
auto *grad_input2 = ctx.Output<Tensor>(framework::GradVarName("Input2"));
grad_input2->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data<T>(), grad_input1->numel());
set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data<T>(), grad_input2->numel());
auto grad_out_dims = grad_output->dims();
int GOC = grad_out_dims[1];
int GOH = grad_out_dims[2];
int GOW = grad_out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(H, W, C);
for (int n = 0; n < N; n++) {
correlation_backward_input1<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input1->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
for (int n = 0; n < N; n++) {
correlation_backward_input2<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input2->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput1.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
correlation, ops::CorrelationKernel<float>,
ops::CorrelationKernel<double>);
REGISTER_OP_CUDA_KERNEL(
correlation_grad, ops::CorrelationGradKernel<float>,
ops::CorrelationGradKernel<double>);
# source /ssd1/vis/liufanglong/.bashrc
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH"
#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH
#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH
#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH
include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' )
lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
echo $include_dir
echo $lib_dir
OPS='correlation_op'
for op in ${OPS}
do
nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \
-I ${include_dir}/third_party/ \
-I ${include_dir}
done
# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-I ${include_dir}/third_party/ \
-I ${include_dir} \
-L ${lib_dir} \
-L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart
# rm *.cu.o
import unittest
from correlation import correlation
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
def corr(x_1,
x_2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1,
corr_multiply=1):
K = kernel_size
# rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
# rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput1 = np.transpose(rinput1, (0, 2, 3, 1))
rinput2 = np.transpose(rinput2, (0, 2, 3, 1))
B = int(rinput1.shape[0])
H = int(x_1.shape[2])
W = int(x_2.shape[3])
d = max_displacement
D = 2 * d + 1
output = np.zeros((B, D * D, H, W), dtype=np.float32)
for b in range(B):
for i in range(H):
for j in range(W):
for k in range(-d, d + 1):
for l in range(-d, d + 1):
x1_index = i + pad_size
y1_index = j + pad_size
x2_index = x1_index + k
y2_index = y1_index + l
output[b, l + d + D * (k + d), i,
j] = np.mean(rinput1[b, x1_index:x1_index + K,
y1_index:y1_index + K] *
rinput2[b, x2_index:x2_index + K,
y2_index:y2_index + K])
return output
class TestCorrelationOp(unittest.TestCase):
def test_check_output(self):
#x_shape = (1, 196, 3, 3)
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
x1 = fluid.layers.data(name='x1',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x2 = fluid.layers.data(name='x2',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
out = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name])
self.assertTrue(np.allclose(res[0], out_np))
class Net(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Net, self).__init__(name_scope)
def forward(self, x1, x2):
y = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
return y
class TestCorrelationOpDyGraph(unittest.TestCase):
def test_check_output(self):
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
place = fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
x1 = to_variable(x1_np)
x2 = to_variable(x2_np)
corr_pd = Net('corr_pd')
y = corr_pd(x1, x2)
out = y.numpy()
self.assertTrue(np.allclose(out, out_np))
if __name__ == '__main__':
unittest.main()
此差异已折叠。
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D
__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4']
def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None):
return Conv2D(in_planes,
out_planes,
filter_size=3,
stride=stride,
padding=int(dilation * (3 - 1) / 2),
dilation=dilation,
bias_attr=False,
param_attr=param_attr)
class BasicBlock(fluid.dygraph.Layer):
expansion = 1
def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None):
super(BasicBlock, self).__init__()
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr)
self.conv2 = conv3x3(planes, planes, param_attr=param_attr)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = fluid.layers.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = fluid.layers.relu(out)
return out
class MultipleBasicBlock(fluid.dygraph.Layer):
def __init__(self,
input_feature,
block,
num_blocks,
intermediate_feature=64,
dense=True):
super(MultipleBasicBlock, self).__init__()
self.dense = dense
self.num_block = num_blocks
self.intermediate_feature = intermediate_feature
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.block1 = Conv2D(input_feature,
intermediate_feature,
filter_size=7,
stride=1,
padding=3,
bias_attr=True,
param_attr=param_attr)
dim = intermediate_feature
self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None
self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None
self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None
self.block5 = Conv2D(dim, 3, 3, 1, 1)
def forward(self, x):
x = fluid.layers.relu(self.block1(x))
x = self.block2(x) if self.num_block >= 2 else x
x = self.block3(x) if self.num_block >= 3 else x
x = self.block4(x) if self.num_block >= 4 else x
x = self.block5(x)
return x
def MultipleBasicBlock_4(input_feature, intermediate_feature=64):
model = MultipleBasicBlock(input_feature, BasicBlock, 4,
intermediate_feature)
return model
cd pwcnet/correlation_op
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
export PYTHONPATH=$PYTHONPATH:`pwd`
cd ../../
VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
OUT_PATH=output
MODEL_PATH=DAIN_paddle_weight
CUDA_VISIBLE_DEVICES=2 python predict.py \
--time_step 0.125 \
--video_path=$VID_PATH \
--output_path=$OUT_PATH \
--saved_model=$MODEL_PATH
\ No newline at end of file
import os, sys
import glob
import shutil
import cv2
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def combine_frames(input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
# assert (num1 - 1) * num_frames == num2
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
def remove_duplicates(paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import paddle
from skimage import color
import numpy as np
from PIL import Image
def convertLAB2RGB( lab ):
lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100) # [0, 1] -> [-128, 128]
rgb = color.lab2rgb( lab.astype(np.float64) )
return rgb
def convertRGB2LABTensor( rgb ):
lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
ab = paddle.to_tensor(ab.astype('float32')) / 255.
L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray( np.uint8( L ) )
L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0,0,0)):
width, height = img.size
if width==target_w and height==target_h:
return img
scale = max(target_w,target_h)/max(width, height)
width = int(width*scale/16.)*16
height = int(height*scale/16.)*16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w-width)//2
yp = (target_h-height)//2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
import cv2
import numpy as np
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
# name_b = '{:08d}'.format(crt_i)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
# examples of running programs:
# bash ./run.sh inference EDVR ./configs/edvr_L.yaml
# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml
# configs should be ./configs/xxx.yaml
mode=$1
name=$2
configs=$3
save_inference_dir="./data/inference_model"
use_gpu=True
fix_random_seed=False
log_interval=1
valid_interval=1
weights="./weights/paddle_state_dict_L.npz"
export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow
# export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_eager_delete_tensor_gb=0.0
# export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "predict"x ]; then
echo $mode $name $configs $weights
if [ "$weights"x != ""x ]; then
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--video_path='' \
--use_gpu=$use_gpu
else
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--use_gpu=$use_gpu \
--video_path=''
fi
fi
model_params:
common_params:
num_kp: 10
num_channels: 3
estimate_jacobian: True
kp_detector_params:
temperature: 0.1
block_expansion: 32
max_features: 1024
scale_factor: 0.25
num_blocks: 5
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 2
num_bottleneck_blocks: 6
estimate_occlusion_map: True
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
discriminator_params:
scales: [1]
block_expansion: 32
max_features: 512
num_blocks: 4
sn: True
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. #you may not use this file except in compliance with the License.
# You may obtain a copy of the License at #You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software #Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, #distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
# limitations under the License. #limitations under the License.
import matplotlib import argparse
matplotlib.use('Agg')
import os
import sys
import yaml
import pickle
import imageio
import numpy as np
from tqdm import tqdm
from skimage import img_as_ubyte
from argparse import ArgumentParser
from skimage.transform import resize
from scipy.spatial import ConvexHull
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from ppgan.modules.keypoint_detector import KPDetector
from ppgan.utils.animate import normalize_kp
import paddle import paddle
paddle.disable_static() from ppgan.first_order_predictor import FirstOrderPredictor
if sys.version_info[0] < 3: parser = argparse.ArgumentParser()
raise Exception( parser.add_argument("--config", default=None, help="path to config")
"You must use Python 3 or higher. Recommended version is Python 3.7") parser.add_argument("--weight_path",
default=None,
help="path to checkpoint to restore")
def load_checkpoints(config_path, checkpoint_path, cpu=False): parser.add_argument("--source_image", type=str, help="path to source image")
parser.add_argument("--driving_video", type=str, help="path to driving video")
with open(config_path) as f: parser.add_argument("--output", default='output', help="path to output")
config = yaml.load(f) parser.add_argument("--relative",
dest="relative",
generator = OcclusionAwareGenerator( action="store_true",
**config['model_params']['generator_params'], help="use relative or absolute keypoint coordinates")
**config['model_params']['common_params']) parser.add_argument(
"--adapt_scale",
kp_detector = KPDetector(**config['model_params']['kp_detector_params'], dest="adapt_scale",
**config['model_params']['common_params']) action="store_true",
help="adapt movement scale based on convex hull of keypoints")
checkpoint = pickle.load(open(checkpoint_path, 'rb'))
generator.set_state_dict(checkpoint['generator']) parser.add_argument(
"--find_best_frame",
kp_detector.set_state_dict(checkpoint['kp_detector']) dest="find_best_frame",
action="store_true",
generator.eval() help=
kp_detector.eval() "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
)
return generator, kp_detector
parser.add_argument("--best_frame",
dest="best_frame",
def make_animation(source_image, type=int,
driving_video, default=None,
generator, help="Set frame to start from.")
kp_detector, parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
relative=True,
adapt_movement_scale=True, parser.set_defaults(relative=False)
cpu=False): parser.set_defaults(adapt_scale=False)
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(np.float32)).transpose(
[0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame(source, driving, cpu=False):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True,
device='cpu' if cpu else 'cuda')
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser() args = parser.parse_args()
parser.add_argument("--config", required=True, help="path to config")
parser.add_argument("--checkpoint", if args.cpu:
default='vox-cpk.pth.tar', paddle.set_device('cpu')
help="path to checkpoint to restore")
predictor = FirstOrderPredictor(output=args.output,
parser.add_argument("--source_image", weight_path=args.weight_path,
default='sup-mat/source.png', config=args.config,
help="path to source image") relative=args.relative,
parser.add_argument("--driving_video", adapt_scale=args.adapt_scale,
default='sup-mat/source.png', find_best_frame=args.find_best_frame,
help="path to driving video") best_frame=args.best_frame)
parser.add_argument("--result_video", predictor.run(args.source_image, args.driving_video)
default='result.mp4',
help="path to output")
parser.add_argument("--relative",
dest="relative",
action="store_true",
help="use relative or absolute keypoint coordinates")
parser.add_argument(
"--adapt_scale",
dest="adapt_scale",
action="store_true",
help="adapt movement scale based on convex hull of keypoints")
parser.add_argument(
"--find_best_frame",
dest="find_best_frame",
action="store_true",
help=
"Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
)
parser.add_argument("--best_frame",
dest="best_frame",
type=int,
default=None,
help="Set frame to start from.")
parser.add_argument("--cpu",
dest="cpu",
action="store_true",
help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
opt = parser.parse_args()
source_image = imageio.imread(opt.source_image)
reader = imageio.get_reader(opt.driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
generator, kp_detector = load_checkpoints(config_path=opt.config,
checkpoint_path=opt.checkpoint,
cpu=opt.cpu)
if opt.find_best_frame or opt.best_frame is not None:
i = opt.best_frame if opt.best_frame is not None else find_best_frame(
source_image, driving_video, cpu=opt.cpu)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = make_animation(
source_image,
driving_forward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions_backward = make_animation(
source_image,
driving_backward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = make_animation(source_image,
driving_video,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
imageio.mimsave(opt.result_video,
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append('.')
import argparse
import paddle
from DAIN.predict import VideoFrameInterp
from DeepRemaster.predict import DeepReasterPredictor
from DeOldify.predict import DeOldifyPredictor
from RealSR.predict import RealSRPredictor
from EDVR.predict import EDVRPredictor
parser = argparse.ArgumentParser(description='Fix video')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--DAIN_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeepRemaster_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeOldify_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--RealSR_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--EDVR_weight',
type=str,
default=None,
help='Path to model weight')
# DAIN args
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
# DeepRemaster args
parser.add_argument('--reference_dir',
type=str,
default=None,
help='Path to the reference image directory')
parser.add_argument('--colorization',
action='store_true',
default=False,
help='Remaster with colorization')
parser.add_argument('--mindim',
type=int,
default=360,
help='Length of minimum image edges')
# DeOldify args
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
parser.add_argument('--proccess_order',
type=str,
default='none',
nargs='+',
help='Process order')
if __name__ == "__main__":
args = parser.parse_args()
orders = args.proccess_order
temp_video_path = None
for order in orders:
print('Model {} proccess start..'.format(order))
if temp_video_path is None:
temp_video_path = args.input
if order == 'DAIN':
predictor = VideoFrameInterp(args.time_step,
args.DAIN_weight,
temp_video_path,
output_path=args.output)
frames_path, temp_video_path = predictor.run()
elif order == 'DeepRemaster':
paddle.disable_static()
predictor = DeepReasterPredictor(
temp_video_path,
args.output,
weight_path=args.DeepRemaster_weight,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'DeOldify':
paddle.disable_static()
predictor = DeOldifyPredictor(temp_video_path,
args.output,
weight_path=args.DeOldify_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'RealSR':
paddle.disable_static()
predictor = RealSRPredictor(temp_video_path,
args.output,
weight_path=args.RealSR_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'EDVR':
predictor = EDVRPredictor(temp_video_path,
args.output,
weight_path=args.EDVR_weight)
frames_path, temp_video_path = predictor.run()
print('Model {} output frames path:'.format(order), frames_path)
print('Model {} output video path:'.format(order), temp_video_path)
print('Model {} proccess done!'.format(order))
from .dain_predictor import DAINPredictor
from .deepremaster_predictor import DeepRemasterPredictor
from .deoldify_predictor import DeOldifyPredictor
from .realsr_predictor import RealSRPredictor
from .edvr_predictor import EDVRPredictor
from .first_order_predictor import FirstOrderPredictor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import paddle
class BasePredictor(object):
def __init__(self):
pass
def build_inference_model(self):
if paddle.in_dynamic_mode():
# todo self.model = build_model(self.cfg)
pass
else:
place = paddle.fluid.framework._current_expected_place()
self.exe = paddle.fluid.Executor(place)
file_names = os.listdir(self.weight_path)
for file_name in file_names:
if file_name.find('model') > -1:
model_file = file_name
elif file_name.find('param') > -1:
param_file = file_name
self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
dirname=self.weight_path,
executor=self.exe,
model_filename=model_file,
params_filename=param_file)
print(self.feed_names)
def base_forward(self, inputs):
if paddle.in_dynamic_mode():
out = self.model(inputs)
else:
feed_dict = {}
if isinstance(inputs, dict):
feed_dict = inputs
elif isinstance(inputs, (list, tuple)):
for i, feed_name in enumerate(self.feed_names):
feed_dict[feed_name] = inputs[i]
else:
feed_dict[self.feed_names[0]] = inputs
out = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed=feed_dict)
return out
def is_video(self, input):
try:
cv2.VideoCapture(input)
return True
except:
return False
def run(self):
raise NotImplementedError
import os, sys # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import math #
import random #Licensed under the Apache License, Version 2.0 (the "License");
import time #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import glob import glob
import shutil import shutil
import numpy as np import numpy as np
from tqdm import tqdm
from imageio import imread, imsave from imageio import imread, imsave
import cv2
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from .base_predictor import BasePredictor
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
class DAINPredictor(BasePredictor):
def __init__(self,
output_path='output',
weight_path=None,
time_step=None,
use_gpu=True,
key_frame_thread=0.,
remove_duplicates=False):
self.output_path = os.path.join(output_path, 'DAIN')
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.weight_path = weight_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.remove_duplicates = remove_duplicates
self.build_inference_model()
def run(self, video_path):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
import networks cap = cv2.VideoCapture(video_path)
from util import *
from my_args import args
if __name__ == '__main__':
DO_MiddleBurryOther = True
video_path = args.video_path
output_path = args.output_path
frame_path_input = os.path.join(output_path, 'frames-input')
frame_path_interpolated = os.path.join(output_path, 'frames-interpolated')
frame_path_combined = os.path.join(output_path, 'frames-combined')
video_path_input = os.path.join(output_path, 'videos-input')
video_path_output = os.path.join(output_path, 'videos-output')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_input):
os.makedirs(video_path_input)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
args.KEY_FRAME_THREAD = 0.
saved_model = args.saved_model
timestep = args.time_step
num_frames = int(1.0 / timestep) - 1
image = fluid.data(name='image',
shape=[2, 1, args.channels, -1, -1],
dtype='float32')
DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels,
filter_size=args.filter_size,
timestep=args.time_step,
training=False)
out = DAIN(image)
out = out[0][1]
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fetch_list = [out.name]
inference_program = fluid.default_main_program().clone(for_test=True)
inference_program = fluid.io.load_persistables(exe, saved_model,
inference_program)
if not DO_MiddleBurryOther:
sys.exit()
if video_path.endswith('.mp4'):
videos = [video_path]
else:
videos = sorted(glob.glob(os.path.join(video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps) print("Old fps (frame rate): ", fps)
timestep = args.time_step
times_interp = int(1.0 / timestep) times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp) r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2) print("New fps (frame rate): ", r2)
# set start and end of video out_path = video2frames(video_path, frame_path_input)
#ss = 0
#t = 10
#ss = time.strftime('%H:%M:%S', time.gmtime(ss))
#t = time.strftime('%H:%M:%S', time.gmtime(t))
#print(r, ss, t)
r = None
ss = None
t = None
out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t) vidname = video_path.split('/')[-1].split('.')[0]
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
orig_frames = len(frames)
need_frames = orig_frames * times_interp
if self.remove_duplicates:
frames = self.remove_duplicate_frames(out_path)
left_frames = len(frames)
timestep = left_frames / need_frames
num_frames = int(1.0 / timestep) - 1
img = imread(frames[0]) img = imread(frames[0])
...@@ -110,7 +99,7 @@ if __name__ == '__main__': ...@@ -110,7 +99,7 @@ if __name__ == '__main__':
int_height = img.shape[0] int_height = img.shape[0]
channel = img.shape[2] channel = img.shape[2]
if not channel == 3: if not channel == 3:
continue return
if int_width != ((int_width >> 7) << 7): if int_width != ((int_width >> 7) << 7):
int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary
...@@ -132,16 +121,13 @@ if __name__ == '__main__': ...@@ -132,16 +121,13 @@ if __name__ == '__main__':
padding_bottom = 32 padding_bottom = 32
frame_num = len(frames) frame_num = len(frames)
print(os.path.join(frame_path_input, vidname, '*.png'))
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated, vidname)): if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname)) os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)): if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname)) os.makedirs(os.path.join(frame_path_combined, vidname))
for i in range(frame_num - 1): for i in tqdm(range(frame_num - 1)):
print(frames[i])
first = frames[i] first = frames[i]
second = frames[i + 1] second = frames[i + 1]
...@@ -155,79 +141,116 @@ if __name__ == '__main__': ...@@ -155,79 +141,116 @@ if __name__ == '__main__':
img_second_gray = img_second_gray.flatten(order='C') img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1] corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False key_frame = False
if corr < args.KEY_FRAME_THREAD: if corr < self.key_frame_thread:
key_frame = True key_frame = True
'''-------------------------------------------------------''' '''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
if key_frame: assert (X0.shape[1] == X1.shape[1])
y_ = [ assert (X0.shape[2] == X1.shape[2])
np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
for i in range(num_frames) X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
] (padding_left, padding_right)), mode='edge')
else: X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
assert (X0.shape[1] == X1.shape[1]) (padding_left, padding_right)), mode='edge')
assert (X0.shape[2] == X1.shape[2])
X0 = np.expand_dims(X0, axis=0)
print("size before padding ", X0.shape) X1 = np.expand_dims(X1, axis=0)
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge') X0 = np.expand_dims(X0, axis=0)
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ X1 = np.expand_dims(X1, axis=0)
(padding_left, padding_right)), mode='edge')
print("size after padding ", X0.shape) X = np.concatenate((X0, X1), axis=0)
X0 = np.expand_dims(X0, axis=0) o = self.base_forward(X)
X1 = np.expand_dims(X1, axis=0)
y_ = o[0]
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0) y_ = [
np.transpose(
X = np.concatenate((X0, X1), axis=0) 255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
proc_end = time.time() padding_left:padding_left + int_width],
o = exe.run(inference_program, (1, 2, 0)) for item in y_
fetch_list=fetch_list, ]
feed={"image": X}) time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
y_ = o[0]
count = 1
proc_timer.update(time.time() - proc_end) for item, time_offset in zip(y_, time_offsets):
tot_timer.update(time.time() - end) out_dir = os.path.join(frame_path_interpolated, vidname,
end = time.time() "{:0>6d}_{:0>4d}.png".format(i, count))
print("*******current image process time \t " + count = count + 1
str(time.time() - proc_end) + "s ******") imsave(out_dir, np.round(item).astype(np.uint8))
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>4d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
timestep = args.time_step
num_frames = int(1.0 / timestep) - 1 num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname) input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname) interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname) combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir, num_frames) self.combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname, frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png') '%08d.png')
video_pattern_output = os.path.join(video_path_output, vidname + '.mp4') video_pattern_output = os.path.join(video_path_output, vidname + '.mp4')
if os.path.exists(video_pattern_output): if os.path.exists(video_pattern_output):
os.remove(video_pattern_output) os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2) frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
def combine_frames(self, input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined,
'{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
def remove_duplicate_frames(self, paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
import paddle # http://www.apache.org/licenses/LICENSE-2.0
import paddle.nn as nn #
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
from PIL import Image import subprocess
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import argparse from PIL import Image
import subprocess from skimage import color
import utils
import paddle
from ppgan.models.generators.remaster import NetworkR, NetworkC from ppgan.models.generators.remaster import NetworkR, NetworkC
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from .base_predictor import BasePredictor
DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'
parser = argparse.ArgumentParser(description='Remastering')
parser.add_argument('--input', type=str, default=None, help='Input video') def convertLAB2RGB(lab):
parser.add_argument('--output', type=str, default='output', help='output dir') lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
parser.add_argument('--reference_dir', lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100,
type=str, 100) # [0, 1] -> [-128, 128]
default=None, rgb = color.lab2rgb(lab.astype(np.float64))
help='Path to the reference image directory') return rgb
parser.add_argument('--colorization',
action='store_true',
default=False, def convertRGB2LABTensor(rgb):
help='Remaster without colorization') lab = color.rgb2lab(
parser.add_argument('--mindim', np.asarray(rgb)) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
type=int, ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
default='360', ab = paddle.to_tensor(ab.astype('float32')) / 255.
help='Length of minimum image edges') L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray(np.uint8(L))
class DeepReasterPredictor: L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0, 0, 0)):
width, height = img.size
if width == target_w and height == target_h:
return img
scale = max(target_w, target_h) / max(width, height)
width = int(width * scale / 16.) * 16
height = int(height * scale / 16.) * 16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w - width) // 2
yp = (target_h - height) // 2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
class DeepRemasterPredictor(BasePredictor):
def __init__(self, def __init__(self,
input, output='output',
output,
weight_path=None, weight_path=None,
colorization=False, colorization=False,
reference_dir=None, reference_dir=None,
mindim=360): mindim=360):
self.input = input
self.output = os.path.join(output, 'DeepRemaster') self.output = os.path.join(output, 'DeepRemaster')
self.colorization = colorization self.colorization = colorization
self.reference_dir = reference_dir self.reference_dir = reference_dir
self.mindim = mindim self.mindim = mindim
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path) weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) self.weight_path = weight_path
state_dict = paddle.load(weight_path)
self.modelR = NetworkR() self.modelR = NetworkR()
self.modelR.load_dict(state_dict['modelR']) self.modelR.load_dict(state_dict['modelR'])
...@@ -63,7 +92,7 @@ class DeepReasterPredictor: ...@@ -63,7 +92,7 @@ class DeepReasterPredictor:
self.modelC.load_dict(state_dict['modelC']) self.modelC.load_dict(state_dict['modelC'])
self.modelC.eval() self.modelC.eval()
def run(self): def run(self, video_path):
outputdir = self.output outputdir = self.output
outputdir_in = os.path.join(outputdir, 'input/') outputdir_in = os.path.join(outputdir, 'input/')
os.makedirs(outputdir_in, exist_ok=True) os.makedirs(outputdir_in, exist_ok=True)
...@@ -94,9 +123,7 @@ class DeepReasterPredictor: ...@@ -94,9 +123,7 @@ class DeepReasterPredictor:
refimgs = [] refimgs = []
for i, v in enumerate(refs): for i, v in enumerate(refs):
refimg = utils.addMergin(v, refimg = addMergin(v, target_w=target_w, target_h=target_h)
target_w=target_w,
target_h=target_h)
refimg = np.array(refimg).astype('float32').transpose( refimg = np.array(refimg).astype('float32').transpose(
2, 0, 1) / 255.0 2, 0, 1) / 255.0
refimgs.append(refimg) refimgs.append(refimg)
...@@ -105,7 +132,7 @@ class DeepReasterPredictor: ...@@ -105,7 +132,7 @@ class DeepReasterPredictor:
refimgs = paddle.unsqueeze(refimgs, 0) refimgs = paddle.unsqueeze(refimgs, 0)
# Load video # Load video
cap = cv2.VideoCapture(self.input) cap = cv2.VideoCapture(video_path)
nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
...@@ -156,7 +183,7 @@ class DeepReasterPredictor: ...@@ -156,7 +183,7 @@ class DeepReasterPredictor:
elif nchannels == 3: elif nchannels == 3:
cv2.imwrite(outputdir_in + '%07d.png' % index, frame) cv2.imwrite(outputdir_in + '%07d.png' % index, frame)
frame = frame[:, :, ::-1] ## BGR -> RGB frame = frame[:, :, ::-1] ## BGR -> RGB
frame_l, frame_ab = utils.convertRGB2LABTensor(frame) frame_l, frame_ab = convertRGB2LABTensor(frame)
frame_l = frame_l.transpose([2, 0, 1]) frame_l = frame_l.transpose([2, 0, 1])
frame_ab = frame_ab.transpose([2, 0, 1]) frame_ab = frame_ab.transpose([2, 0, 1])
frame_l = frame_l.reshape([ frame_l = frame_l.reshape([
...@@ -193,7 +220,7 @@ class DeepReasterPredictor: ...@@ -193,7 +220,7 @@ class DeepReasterPredictor:
(out_l, out_ab), (out_l, out_ab),
axis=0).detach().numpy().transpose((1, 2, 0)) axis=0).detach().numpy().transpose((1, 2, 0))
out = Image.fromarray( out = Image.fromarray(
np.uint8(utils.convertLAB2RGB(out) * 255)) np.uint8(convertLAB2RGB(out) * 255))
out.save(outputdir_out + '%07d.png' % (index)) out.save(outputdir_out + '%07d.png' % (index))
else: else:
raise ValueError('channels of imag3 must be 3!') raise ValueError('channels of imag3 must be 3!')
...@@ -214,7 +241,7 @@ class DeepReasterPredictor: ...@@ -214,7 +241,7 @@ class DeepReasterPredictor:
output = paddle.concat( output = paddle.concat(
(out_l, out_c), axis=0).numpy().transpose((1, 2, 0)) (out_l, out_c), axis=0).numpy().transpose((1, 2, 0))
output = Image.fromarray( output = Image.fromarray(
np.uint8(utils.convertLAB2RGB(output) * 255)) np.uint8(convertLAB2RGB(output) * 255))
output.save(outputdir_out + '%07d.png' % index) output.save(outputdir_out + '%07d.png' % index)
it = it + 1 it = it + 1
...@@ -222,7 +249,7 @@ class DeepReasterPredictor: ...@@ -222,7 +249,7 @@ class DeepReasterPredictor:
# Save result videos # Save result videos
outfile = os.path.join(outputdir, outfile = os.path.join(outputdir,
self.input.split('/')[-1].split('.')[0]) video_path.split('/')[-1].split('.')[0])
cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % ( cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % (
fps, outputdir_in, fps, outfile) fps, outputdir_in, fps, outfile)
subprocess.call(cmd, shell=True) subprocess.call(cmd, shell=True)
...@@ -236,14 +263,3 @@ class DeepReasterPredictor: ...@@ -236,14 +263,3 @@ class DeepReasterPredictor:
cap.release() cap.release()
pbar.close() pbar.close()
return outputdir_out, '%s_out.mp4' % outfile return outputdir_out, '%s_out.mp4' % outfile
if __name__ == "__main__":
args = parser.parse_args()
paddle.disable_static()
predictor = DeepReasterPredictor(args.input,
args.output,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
predictor.run()
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
import glob import glob
import argparse
import numpy as np import numpy as np
import paddle
import pickle
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
from paddle import fluid
import paddle
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
from ppgan.models.generators.deoldify import build_model from ppgan.models.generators.deoldify import build_model
parser = argparse.ArgumentParser(description='DeOldify') from .base_predictor import BasePredictor
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
class DeOldifyPredictor(): class DeOldifyPredictor(BasePredictor):
def __init__(self, def __init__(self, output='output', weight_path=None, render_factor=32):
input, # self.input = input
output,
batch_size=1,
weight_path=None,
render_factor=32):
self.input = input
self.output = os.path.join(output, 'DeOldify') self.output = os.path.join(output, 'DeOldify')
self.render_factor = render_factor self.render_factor = render_factor
self.model = build_model() self.model = build_model()
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path) weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict) self.model.load_dict(state_dict)
self.model.eval() self.model.eval()
...@@ -85,8 +77,14 @@ class DeOldifyPredictor(): ...@@ -85,8 +77,14 @@ class DeOldifyPredictor():
final = Image.fromarray(final) final = Image.fromarray(final)
return final return final
def run_single(self, img_path): def run_image(self, img):
ori_img = Image.open(img_path).convert('LA').convert('RGB') if isinstance(img, str):
ori_img = Image.open(img).convert('LA').convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('LA').convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img, self.render_factor) img = self.norm(ori_img, self.render_factor)
x = paddle.to_tensor(img[np.newaxis, ...]) x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x) out = self.model(x)
...@@ -97,9 +95,8 @@ class DeOldifyPredictor(): ...@@ -97,9 +95,8 @@ class DeOldifyPredictor():
pred_img = self.post_process(pred_img, ori_img) pred_img = self.post_process(pred_img, ori_img)
return pred_img return pred_img
def run(self): def run_video(self, video):
vid = self.input base_name = os.path.basename(video).split('.')[0]
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -109,15 +106,15 @@ class DeOldifyPredictor(): ...@@ -109,15 +106,15 @@ class DeOldifyPredictor():
if not os.path.exists(pred_frame_path): if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path) os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid) cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path) out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames): for frame in tqdm(frames):
pred_img = self.run_single(frame) pred_img = self.run_image(frame)
frame_name = os.path.basename(frame) frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name)) pred_img.save(os.path.join(pred_frame_path, frame_name))
...@@ -130,15 +127,14 @@ class DeOldifyPredictor(): ...@@ -130,15 +127,14 @@ class DeOldifyPredictor():
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__': if self.output:
paddle.disable_static() base_name = os.path.basename(input)
args = parser.parse_args() pred_img.save(os.path.join(self.output, base_name + '.png'))
predictor = DeOldifyPredictor(args.input,
args.output,
weight_path=args.weight_path,
render_factor=args.render_factor)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path) return pred_img
...@@ -13,44 +13,18 @@ ...@@ -13,44 +13,18 @@
#limitations under the License. #limitations under the License.
import os import os
import sys import cv2
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time import time
import argparse
import ast
import glob import glob
import numpy as np import numpy as np
import paddle.fluid as fluid
import cv2
from tqdm import tqdm from tqdm import tqdm
from data import EDVRDataset
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' from .base_predictor import BasePredictor
def parse_args(): EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
parser = argparse.ArgumentParser()
parser.add_argument('--input',
type=str,
default=None,
help='input video path')
parser.add_argument('--output',
type=str,
default='output',
help='output path')
parser.add_argument('--weight_path',
type=str,
default=None,
help='weight path')
args = parser.parse_args()
return args
def get_img(pred): def get_img(pred):
...@@ -72,29 +46,107 @@ def save_img(img, framename): ...@@ -72,29 +46,107 @@ def save_img(img, framename):
cv2.imwrite(framename, img) cv2.imwrite(framename, img)
class EDVRPredictor: def read_img(path, size=None, is_gt=False):
def __init__(self, input, output, weight_path=None): """read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
class EDVRPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input self.input = input
self.output = os.path.join(output, 'EDVR') self.output = os.path.join(output, 'EDVR')
place = fluid.CUDAPlace(
0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
self.exe = fluid.Executor(place)
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path) weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path)
model_filename = 'EDVR_model.pdmodel' self.weight_path = weight_path
params_filename = 'EDVR_params.pdparams'
out = fluid.io.load_inference_model(dirname=weight_path, self.build_inference_model()
model_filename=model_filename,
params_filename=params_filename,
executor=self.exe)
self.infer_prog, self.feed_list, self.fetch_list = out
def run(self): def run(self, video_path):
vid = self.input vid = video_path
base_name = os.path.basename(vid).split('.')[0] base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -119,11 +171,9 @@ class EDVRPredictor: ...@@ -119,11 +171,9 @@ class EDVRPredictor:
for infer_iter, data in enumerate(tqdm(dataset)): for infer_iter, data in enumerate(tqdm(dataset)):
data_feed_in = [data[0]] data_feed_in = [data[0]]
infer_outs = self.exe.run( outs = self.base_forward(np.array(data_feed_in))
self.infer_prog,
fetch_list=self.fetch_list, infer_result_list = [item for item in outs]
feed={self.feed_list[0]: np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
frame_path = data[1] frame_path = data[1]
...@@ -144,9 +194,3 @@ class EDVRPredictor: ...@@ -144,9 +194,3 @@ class EDVRPredictor:
frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) frames2video(frame_pattern_combined, vid_out_path, str(int(fps)))
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
if __name__ == "__main__":
args = parse_args()
predictor = EDVRPredictor(args.input, args.output, args.weight_path)
predictor.run()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import yaml
import pickle
import imageio
import numpy as np
from tqdm import tqdm
from skimage import img_as_ubyte
from skimage.transform import resize
from scipy.spatial import ConvexHull
import paddle
from paddle.utils.download import get_path_from_url
from ppgan.utils.animate import normalize_kp
from ppgan.modules.keypoint_detector import KPDetector
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from .base_predictor import BasePredictor
class FirstOrderPredictor(BasePredictor):
def __init__(self,
output='output',
weight_path=None,
config=None,
relative=False,
adapt_scale=False,
find_best_frame=False,
best_frame=None):
if config is not None and isinstance(config, str):
self.cfg = yaml.load(config)
elif isinstance(config, dict):
self.cfg = config
elif config is None:
self.cfg = {
'model_params': {
'common_params': {
'num_kp': 10,
'num_channels': 3,
'estimate_jacobian': True
},
'kp_detector_params': {
'temperature': 0.1,
'block_expansion': 32,
'max_features': 1024,
'scale_factor': 0.25,
'num_blocks': 5
},
'generator_params': {
'block_expansion': 64,
'max_features': 512,
'num_down_blocks': 2,
'num_bottleneck_blocks': 6,
'estimate_occlusion_map': True,
'dense_motion_params': {
'block_expansion': 64,
'max_features': 1024,
'num_blocks': 5,
'scale_factor': 0.25
}
}
}
}
if weight_path is None:
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(vox_cpk_weight_url, cur_path)
self.weight_path = weight_path
self.output = output
self.relative = relative
self.adapt_scale = adapt_scale
self.find_best_frame = find_best_frame
self.best_frame = best_frame
self.generator, self.kp_detector = self.load_checkpoints(
self.cfg, self.weight_path)
def run(self, source_image, driving_video):
source_image = imageio.imread(source_image)
reader = imageio.get_reader(driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
if self.find_best_frame or self.best_frame is not None:
i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
source_image, driving_video)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = self.make_animation(
source_image,
driving_forward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions_backward = self.make_animation(
source_image,
driving_backward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = self.make_animation(
source_image,
driving_video,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
imageio.mimsave(os.path.join(self.output, 'result.mp4'),
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
def load_checkpoints(self, config, checkpoint_path):
generator = OcclusionAwareGenerator(
**config['model_params']['generator_params'],
**config['model_params']['common_params'])
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
checkpoint = paddle.load(self.weight_path)
generator.set_state_dict(checkpoint['generator'])
kp_detector.set_state_dict(checkpoint['kp_detector'])
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(self,
source_image,
driving_video,
generator,
kp_detector,
relative=True,
adapt_movement_scale=True):
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(
np.float32)).transpose([0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(
kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame_func(self, source, driving):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True)
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
import glob import glob
import argparse
import numpy as np import numpy as np
import paddle
import pickle
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
import paddle
from ppgan.models.generators import RRDBNet from ppgan.models.generators import RRDBNet
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from .base_predictor import BasePredictor
parser = argparse.ArgumentParser(description='RealSR')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'
class RealSRPredictor(): class RealSRPredictor(BasePredictor):
def __init__(self, input, output, batch_size=1, weight_path=None): def __init__(self, output='output', weight_path=None):
self.input = input self.input = input
self.output = os.path.join(output, 'RealSR') self.output = os.path.join(output, 'RealSR')
self.model = RRDBNet(3, 3, 64, 23) self.model = RRDBNet(3, 3, 64, 23)
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path) weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict) self.model.load_dict(state_dict)
self.model.eval() self.model.eval()
...@@ -49,8 +49,14 @@ class RealSRPredictor(): ...@@ -49,8 +49,14 @@ class RealSRPredictor():
img = img.transpose((1, 2, 0)) img = img.transpose((1, 2, 0))
return (img * 255).clip(0, 255).astype('uint8') return (img * 255).clip(0, 255).astype('uint8')
def run_single(self, img_path): def run_image(self, img):
ori_img = Image.open(img_path).convert('RGB') if isinstance(img, str):
ori_img = Image.open(img).convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img) img = self.norm(ori_img)
x = paddle.to_tensor(img[np.newaxis, ...]) x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x) out = self.model(x)
...@@ -59,9 +65,8 @@ class RealSRPredictor(): ...@@ -59,9 +65,8 @@ class RealSRPredictor():
pred_img = Image.fromarray(pred_img) pred_img = Image.fromarray(pred_img)
return pred_img return pred_img
def run(self): def run_video(self, video):
vid = self.input base_name = os.path.basename(video).split('.')[0]
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -71,15 +76,15 @@ class RealSRPredictor(): ...@@ -71,15 +76,15 @@ class RealSRPredictor():
if not os.path.exists(pred_frame_path): if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path) os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid) cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path) out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames): for frame in tqdm(frames):
pred_img = self.run_single(frame) pred_img = self.run_image(frame)
frame_name = os.path.basename(frame) frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name)) pred_img.save(os.path.join(pred_frame_path, frame_name))
...@@ -92,14 +97,14 @@ class RealSRPredictor(): ...@@ -92,14 +97,14 @@ class RealSRPredictor():
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__': if self.output:
paddle.disable_static() base_name = os.path.basename(input)
args = parser.parse_args() pred_img.save(os.path.join(self.output, base_name + '.png'))
predictor = RealSRPredictor(args.input,
args.output,
weight_path=args.weight_path)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path) return pred_img
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册